summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/40x/Kconfig78
-rw-r--r--arch/powerpc/platforms/40x/Makefile2
-rw-r--r--arch/powerpc/platforms/40x/ppc40x_simple.c77
-rw-r--r--arch/powerpc/platforms/44x/44x.h12
-rw-r--r--arch/powerpc/platforms/44x/Kconfig319
-rw-r--r--arch/powerpc/platforms/44x/Makefile14
-rw-r--r--arch/powerpc/platforms/44x/canyonlands.c117
-rw-r--r--arch/powerpc/platforms/44x/ebony.c61
-rw-r--r--arch/powerpc/platforms/44x/fsp2.c316
-rw-r--r--arch/powerpc/platforms/44x/fsp2.h272
-rw-r--r--arch/powerpc/platforms/44x/idle.c54
-rw-r--r--arch/powerpc/platforms/44x/iss4xx.c150
-rw-r--r--arch/powerpc/platforms/44x/machine_check.c87
-rw-r--r--arch/powerpc/platforms/44x/misc_44x.S41
-rw-r--r--arch/powerpc/platforms/44x/ppc44x_simple.c85
-rw-r--r--arch/powerpc/platforms/44x/ppc476.c290
-rw-r--r--arch/powerpc/platforms/44x/ppc476_modules.lds15
-rw-r--r--arch/powerpc/platforms/44x/sam440ep.c69
-rw-r--r--arch/powerpc/platforms/44x/warp.c329
-rw-r--r--arch/powerpc/platforms/4xx/Makefile7
-rw-r--r--arch/powerpc/platforms/4xx/cpm.c332
-rw-r--r--arch/powerpc/platforms/4xx/gpio.c195
-rw-r--r--arch/powerpc/platforms/4xx/hsta_msi.c208
-rw-r--r--arch/powerpc/platforms/4xx/machine_check.c23
-rw-r--r--arch/powerpc/platforms/4xx/pci.c2182
-rw-r--r--arch/powerpc/platforms/4xx/pci.h505
-rw-r--r--arch/powerpc/platforms/4xx/soc.c218
-rw-r--r--arch/powerpc/platforms/4xx/uic.c331
-rw-r--r--arch/powerpc/platforms/512x/Kconfig42
-rw-r--r--arch/powerpc/platforms/512x/Makefile10
-rw-r--r--arch/powerpc/platforms/512x/clock-commonclk.c1224
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads.c71
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads.h12
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads_cpld.c198
-rw-r--r--arch/powerpc/platforms/512x/mpc512x.h18
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_generic.c51
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_lpbfifo.c518
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c506
-rw-r--r--arch/powerpc/platforms/512x/pdm360ng.c126
-rw-r--r--arch/powerpc/platforms/52xx/Kconfig56
-rw-r--r--arch/powerpc/platforms/52xx/Makefile16
-rw-r--r--arch/powerpc/platforms/52xx/efika.c233
-rw-r--r--arch/powerpc/platforms/52xx/lite5200.c192
-rw-r--r--arch/powerpc/platforms/52xx/lite5200_pm.c249
-rw-r--r--arch/powerpc/platforms/52xx/lite5200_sleep.S422
-rw-r--r--arch/powerpc/platforms/52xx/media5200.c239
-rw-r--r--arch/powerpc/platforms/52xx/mpc5200_simple.c79
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_common.c306
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c780
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pci.c419
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pic.c519
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pm.c206
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_sleep.S155
-rw-r--r--arch/powerpc/platforms/82xx/Kconfig28
-rw-r--r--arch/powerpc/platforms/82xx/Makefile7
-rw-r--r--arch/powerpc/platforms/82xx/ep8248e.c307
-rw-r--r--arch/powerpc/platforms/82xx/km82xx.c199
-rw-r--r--arch/powerpc/platforms/82xx/pq2.c34
-rw-r--r--arch/powerpc/platforms/82xx/pq2.h21
-rw-r--r--arch/powerpc/platforms/83xx/Kconfig92
-rw-r--r--arch/powerpc/platforms/83xx/Makefile18
-rw-r--r--arch/powerpc/platforms/83xx/asp834x.c45
-rw-r--r--arch/powerpc/platforms/83xx/km83xx.c188
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c224
-rw-r--r--arch/powerpc/platforms/83xx/misc.c149
-rw-r--r--arch/powerpc/platforms/83xx/mpc830x_rdb.c57
-rw-r--r--arch/powerpc/platforms/83xx/mpc831x_rdb.c57
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c227
-rw-r--r--arch/powerpc/platforms/83xx/mpc834x_itx.c70
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_rdk.c41
-rw-r--r--arch/powerpc/platforms/83xx/mpc837x_rdb.c82
-rw-r--r--arch/powerpc/platforms/83xx/mpc83xx.h83
-rw-r--r--arch/powerpc/platforms/83xx/suspend-asm.S551
-rw-r--r--arch/powerpc/platforms/83xx/suspend.c431
-rw-r--r--arch/powerpc/platforms/83xx/usb_831x.c128
-rw-r--r--arch/powerpc/platforms/83xx/usb_834x.c90
-rw-r--r--arch/powerpc/platforms/83xx/usb_837x.c58
-rw-r--r--arch/powerpc/platforms/85xx/Kconfig291
-rw-r--r--arch/powerpc/platforms/85xx/Makefile37
-rw-r--r--arch/powerpc/platforms/85xx/bsc913x_qds.c63
-rw-r--r--arch/powerpc/platforms/85xx/bsc913x_rdb.c50
-rw-r--r--arch/powerpc/platforms/85xx/c293pcie.c54
-rw-r--r--arch/powerpc/platforms/85xx/common.c106
-rw-r--r--arch/powerpc/platforms/85xx/corenet_generic.c203
-rw-r--r--arch/powerpc/platforms/85xx/ge_imp3a.c207
-rw-r--r--arch/powerpc/platforms/85xx/ksi8560.c184
-rw-r--r--arch/powerpc/platforms/85xx/mpc8536_ds.c66
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx.h24
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_8259.c64
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_ds.c98
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_mds.c372
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c107
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_rdb.c208
-rw-r--r--arch/powerpc/platforms/85xx/mvme2500.c57
-rw-r--r--arch/powerpc/platforms/85xx/p1010rdb.c77
-rw-r--r--arch/powerpc/platforms/85xx/p1022_ds.c563
-rw-r--r--arch/powerpc/platforms/85xx/p1022_rdk.c143
-rw-r--r--arch/powerpc/platforms/85xx/p1023_rdb.c107
-rw-r--r--arch/powerpc/platforms/85xx/p2020.c81
-rw-r--r--arch/powerpc/platforms/85xx/ppa8548.c83
-rw-r--r--arch/powerpc/platforms/85xx/qemu_e500.c63
-rw-r--r--arch/powerpc/platforms/85xx/sgy_cts1000.c156
-rw-r--r--arch/powerpc/platforms/85xx/smp.c519
-rw-r--r--arch/powerpc/platforms/85xx/smp.h17
-rw-r--r--arch/powerpc/platforms/85xx/socrates.c79
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.c308
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.h11
-rw-r--r--arch/powerpc/platforms/85xx/stx_gp3.c94
-rw-r--r--arch/powerpc/platforms/85xx/t1042rdb_diu.c152
-rw-r--r--arch/powerpc/platforms/85xx/tqm85xx.c131
-rw-r--r--arch/powerpc/platforms/85xx/twr_p102x.c117
-rw-r--r--arch/powerpc/platforms/85xx/xes_mpc85xx.c176
-rw-r--r--arch/powerpc/platforms/86xx/Kconfig61
-rw-r--r--arch/powerpc/platforms/86xx/Makefile11
-rw-r--r--arch/powerpc/platforms/86xx/common.c43
-rw-r--r--arch/powerpc/platforms/86xx/gef_ppc9a.c192
-rw-r--r--arch/powerpc/platforms/86xx/gef_sbc310.c179
-rw-r--r--arch/powerpc/platforms/86xx/gef_sbc610.c169
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx.h19
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_smp.c118
-rw-r--r--arch/powerpc/platforms/86xx/mvme7100.c114
-rw-r--r--arch/powerpc/platforms/86xx/pic.c69
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig200
-rw-r--r--arch/powerpc/platforms/8xx/Makefile12
-rw-r--r--arch/powerpc/platforms/8xx/adder875.c105
-rw-r--r--arch/powerpc/platforms/8xx/cpm1-ic.c188
-rw-r--r--arch/powerpc/platforms/8xx/cpm1.c636
-rw-r--r--arch/powerpc/platforms/8xx/ep88xc.c170
-rw-r--r--arch/powerpc/platforms/8xx/m8xx_setup.c172
-rw-r--r--arch/powerpc/platforms/8xx/machine_check.c34
-rw-r--r--arch/powerpc/platforms/8xx/mpc86xads.h47
-rw-r--r--arch/powerpc/platforms/8xx/mpc86xads_setup.c145
-rw-r--r--arch/powerpc/platforms/8xx/mpc885ads.h49
-rw-r--r--arch/powerpc/platforms/8xx/mpc885ads_setup.c217
-rw-r--r--arch/powerpc/platforms/8xx/mpc8xx.h20
-rw-r--r--arch/powerpc/platforms/8xx/pic.c155
-rw-r--r--arch/powerpc/platforms/8xx/pic.h19
-rw-r--r--arch/powerpc/platforms/8xx/tqm8xx_setup.c148
-rw-r--r--arch/powerpc/platforms/Kconfig307
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype646
-rw-r--r--arch/powerpc/platforms/Makefile26
-rw-r--r--arch/powerpc/platforms/amigaone/Kconfig19
-rw-r--r--arch/powerpc/platforms/amigaone/Makefile2
-rw-r--r--arch/powerpc/platforms/amigaone/setup.c168
-rw-r--r--arch/powerpc/platforms/book3s/Kconfig15
-rw-r--r--arch/powerpc/platforms/book3s/Makefile2
-rw-r--r--arch/powerpc/platforms/book3s/vas-api.c634
-rw-r--r--arch/powerpc/platforms/cell/Kconfig104
-rw-r--r--arch/powerpc/platforms/cell/Makefile27
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c481
-rw-r--r--arch/powerpc/platforms/cell/cbe_powerbutton.c105
-rw-r--r--arch/powerpc/platforms/cell/cbe_regs.c298
-rw-r--r--arch/powerpc/platforms/cell/cbe_thermal.c386
-rw-r--r--arch/powerpc/platforms/cell/cell.h15
-rw-r--r--arch/powerpc/platforms/cell/cpufreq_spudemand.c133
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c390
-rw-r--r--arch/powerpc/platforms/cell/interrupt.h90
-rw-r--r--arch/powerpc/platforms/cell/iommu.c1094
-rw-r--r--arch/powerpc/platforms/cell/pervasive.c125
-rw-r--r--arch/powerpc/platforms/cell/pervasive.h26
-rw-r--r--arch/powerpc/platforms/cell/pmu.c412
-rw-r--r--arch/powerpc/platforms/cell/ras.c352
-rw-r--r--arch/powerpc/platforms/cell/ras.h13
-rw-r--r--arch/powerpc/platforms/cell/setup.c274
-rw-r--r--arch/powerpc/platforms/cell/smp.c161
-rw-r--r--arch/powerpc/platforms/cell/spider-pci.c170
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c344
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c790
-rw-r--r--arch/powerpc/platforms/cell/spu_callbacks.c64
-rw-r--r--arch/powerpc/platforms/cell/spu_manage.c530
-rw-r--r--arch/powerpc/platforms/cell/spu_priv1_mmio.c167
-rw-r--r--arch/powerpc/platforms/cell/spu_priv1_mmio.h14
-rw-r--r--arch/powerpc/platforms/cell/spu_syscalls.c165
-rw-r--r--arch/powerpc/platforms/cell/spufs/.gitignore3
-rw-r--r--arch/powerpc/platforms/cell/spufs/Makefile63
-rw-r--r--arch/powerpc/platforms/cell/spufs/backing_ops.c400
-rw-r--r--arch/powerpc/platforms/cell/spufs/context.c175
-rw-r--r--arch/powerpc/platforms/cell/spufs/coredump.c182
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c167
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c2633
-rw-r--r--arch/powerpc/platforms/cell/spufs/gang.c74
-rw-r--r--arch/powerpc/platforms/cell/spufs/hw_ops.c335
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c826
-rw-r--r--arch/powerpc/platforms/cell/spufs/lscsa_alloc.c50
-rw-r--r--arch/powerpc/platforms/cell/spufs/run.c451
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c1141
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_restore.c322
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S102
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped935
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_save.c181
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_save_crt0.S88
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped743
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_utils.h147
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h356
-rw-r--r--arch/powerpc/platforms/cell/spufs/sputrace.h41
-rw-r--r--arch/powerpc/platforms/cell/spufs/switch.c2206
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c89
-rw-r--r--arch/powerpc/platforms/chrp/Kconfig16
-rw-r--r--arch/powerpc/platforms/chrp/Makefile4
-rw-r--r--arch/powerpc/platforms/chrp/chrp.h11
-rw-r--r--arch/powerpc/platforms/chrp/gg2.h61
-rw-r--r--arch/powerpc/platforms/chrp/nvram.c95
-rw-r--r--arch/powerpc/platforms/chrp/pci.c387
-rw-r--r--arch/powerpc/platforms/chrp/pegasos_eth.c201
-rw-r--r--arch/powerpc/platforms/chrp/setup.c586
-rw-r--r--arch/powerpc/platforms/chrp/smp.c54
-rw-r--r--arch/powerpc/platforms/chrp/time.c159
-rw-r--r--arch/powerpc/platforms/embedded6xx/Kconfig94
-rw-r--r--arch/powerpc/platforms/embedded6xx/Makefile12
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.c244
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.h20
-rw-r--r--arch/powerpc/platforms/embedded6xx/gamecube.c89
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.c235
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.h17
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c268
-rw-r--r--arch/powerpc/platforms/embedded6xx/linkstation.c162
-rw-r--r--arch/powerpc/platforms/embedded6xx/ls_uart.c147
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc10x.h159
-rw-r--r--arch/powerpc/platforms/embedded6xx/mvme5100.c208
-rw-r--r--arch/powerpc/platforms/embedded6xx/storcenter.c121
-rw-r--r--arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c306
-rw-r--r--arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h27
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c180
-rw-r--r--arch/powerpc/platforms/fsl_uli1575.c379
-rw-r--r--arch/powerpc/platforms/maple/Kconfig19
-rw-r--r--arch/powerpc/platforms/maple/Makefile2
-rw-r--r--arch/powerpc/platforms/maple/maple.h15
-rw-r--r--arch/powerpc/platforms/maple/pci.c672
-rw-r--r--arch/powerpc/platforms/maple/setup.c363
-rw-r--r--arch/powerpc/platforms/maple/time.c170
-rw-r--r--arch/powerpc/platforms/microwatt/Kconfig11
-rw-r--r--arch/powerpc/platforms/microwatt/Makefile1
-rw-r--r--arch/powerpc/platforms/microwatt/microwatt.h7
-rw-r--r--arch/powerpc/platforms/microwatt/rng.c44
-rw-r--r--arch/powerpc/platforms/microwatt/setup.c43
-rw-r--r--arch/powerpc/platforms/pasemi/Kconfig51
-rw-r--r--arch/powerpc/platforms/pasemi/Makefile4
-rw-r--r--arch/powerpc/platforms/pasemi/dma_lib.c621
-rw-r--r--arch/powerpc/platforms/pasemi/gpio_mdio.c327
-rw-r--r--arch/powerpc/platforms/pasemi/idle.c93
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c267
-rw-r--r--arch/powerpc/platforms/pasemi/misc.c87
-rw-r--r--arch/powerpc/platforms/pasemi/msi.c162
-rw-r--r--arch/powerpc/platforms/pasemi/pasemi.h36
-rw-r--r--arch/powerpc/platforms/pasemi/pci.c294
-rw-r--r--arch/powerpc/platforms/pasemi/powersave.S76
-rw-r--r--arch/powerpc/platforms/pasemi/setup.c456
-rw-r--r--arch/powerpc/platforms/pasemi/time.c18
-rw-r--r--arch/powerpc/platforms/powermac/Kconfig34
-rw-r--r--arch/powerpc/platforms/powermac/Makefile25
-rw-r--r--arch/powerpc/platforms/powermac/backlight.c220
-rw-r--r--arch/powerpc/platforms/powermac/bootx_init.c595
-rw-r--r--arch/powerpc/platforms/powermac/cache.S356
-rw-r--r--arch/powerpc/platforms/powermac/feature.c3022
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c1514
-rw-r--r--arch/powerpc/platforms/powermac/nvram.c656
-rw-r--r--arch/powerpc/platforms/powermac/pci.c1261
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_base.c412
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_core.c1022
-rw-r--r--arch/powerpc/platforms/powermac/pic.c650
-rw-r--r--arch/powerpc/platforms/powermac/pmac.h47
-rw-r--r--arch/powerpc/platforms/powermac/setup.c601
-rw-r--r--arch/powerpc/platforms/powermac/sleep.S433
-rw-r--r--arch/powerpc/platforms/powermac/smp.c1025
-rw-r--r--arch/powerpc/platforms/powermac/time.c243
-rw-r--r--arch/powerpc/platforms/powermac/udbg_adb.c220
-rw-r--r--arch/powerpc/platforms/powermac/udbg_scc.c184
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig38
-rw-r--r--arch/powerpc/platforms/powernv/Makefile33
-rw-r--r--arch/powerpc/platforms/powernv/copy-paste.h42
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c1696
-rw-r--r--arch/powerpc/platforms/powernv/idle.c1507
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c339
-rw-r--r--arch/powerpc/platforms/powernv/ocxl.c598
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c290
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c295
-rw-r--r--arch/powerpc/platforms/powernv/opal-core.c663
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c459
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c340
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.c726
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.h146
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c566
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c381
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c324
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c312
-rw-r--r--arch/powerpc/platforms/powernv/opal-kmsg.c47
-rw-r--r--arch/powerpc/platforms/powernv/opal-lpc.c418
-rw-r--r--arch/powerpc/platforms/powernv/opal-memory-errors.c134
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c161
-rw-r--r--arch/powerpc/platforms/powernv/opal-nvram.c113
-rw-r--r--arch/powerpc/platforms/powernv/opal-power.c174
-rw-r--r--arch/powerpc/platforms/powernv/opal-powercap.c251
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c452
-rw-r--r--arch/powerpc/platforms/powernv/opal-psr.c175
-rw-r--r--arch/powerpc/platforms/powernv/opal-rtc.c84
-rw-r--r--arch/powerpc/platforms/powernv/opal-secvar.c182
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c240
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor.c132
-rw-r--r--arch/powerpc/platforms/powernv/opal-sysparam.c294
-rw-r--r--arch/powerpc/platforms/powernv/opal-tracepoints.c87
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S63
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c210
-rw-r--r--arch/powerpc/platforms/powernv/opal.c1251
-rw-r--r--arch/powerpc/platforms/powernv/pci-cxl.c153
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda-tce.c430
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2827
-rw-r--r--arch/powerpc/platforms/powernv/pci-sriov.c760
-rw-r--r--arch/powerpc/platforms/powernv/pci.c862
-rw-r--r--arch/powerpc/platforms/powernv/pci.h340
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h47
-rw-r--r--arch/powerpc/platforms/powernv/rng.c200
-rw-r--r--arch/powerpc/platforms/powernv/setup.c587
-rw-r--r--arch/powerpc/platforms/powernv/smp.c441
-rw-r--r--arch/powerpc/platforms/powernv/subcore-asm.S91
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c449
-rw-r--r--arch/powerpc/platforms/powernv/subcore.h21
-rw-r--r--arch/powerpc/platforms/powernv/ultravisor.c70
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c168
-rw-r--r--arch/powerpc/platforms/powernv/vas-fault.c245
-rw-r--r--arch/powerpc/platforms/powernv/vas-trace.h113
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c1471
-rw-r--r--arch/powerpc/platforms/powernv/vas.c253
-rw-r--r--arch/powerpc/platforms/powernv/vas.h501
-rw-r--r--arch/powerpc/platforms/ps3/Kconfig182
-rw-r--r--arch/powerpc/platforms/ps3/Makefile9
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c975
-rw-r--r--arch/powerpc/platforms/ps3/exports.c13
-rw-r--r--arch/powerpc/platforms/ps3/gelic_udbg.c244
-rw-r--r--arch/powerpc/platforms/ps3/htab.c195
-rw-r--r--arch/powerpc/platforms/ps3/hvcall.S792
-rw-r--r--arch/powerpc/platforms/ps3/interrupt.c783
-rw-r--r--arch/powerpc/platforms/ps3/mm.c1254
-rw-r--r--arch/powerpc/platforms/ps3/os-area.c830
-rw-r--r--arch/powerpc/platforms/ps3/platform.h253
-rw-r--r--arch/powerpc/platforms/ps3/repository.c1380
-rw-r--r--arch/powerpc/platforms/ps3/setup.c305
-rw-r--r--arch/powerpc/platforms/ps3/smp.c120
-rw-r--r--arch/powerpc/platforms/ps3/spu.c619
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c803
-rw-r--r--arch/powerpc/platforms/ps3/time.c59
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig186
-rw-r--r--arch/powerpc/platforms/pseries/Makefile39
-rw-r--r--arch/powerpc/platforms/pseries/cc_platform.c26
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c663
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c583
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c445
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c887
-rw-r--r--arch/powerpc/platforms/pseries/event_sources.c30
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c191
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c901
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c923
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S370
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c140
-rw-r--r--arch/powerpc/platforms/pseries/hvconsole.c75
-rw-r--r--arch/powerpc/platforms/pseries/hvcserver.c239
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c479
-rw-r--r--arch/powerpc/platforms/pseries/io_event_irq.c161
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c1742
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c71
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c2026
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c802
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c830
-rw-r--r--arch/powerpc/platforms/pseries/msi.c698
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c241
-rw-r--r--arch/powerpc/platforms/pseries/of_helpers.c97
-rw-r--r--arch/powerpc/platforms/pseries/of_helpers.h9
-rw-r--r--arch/powerpc/platforms/pseries/papr-sysparm.c151
-rw-r--r--arch/powerpc/platforms/pseries/papr_platform_attributes.c362
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c1581
-rw-r--r--arch/powerpc/platforms/pseries/pci.c322
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c111
-rw-r--r--arch/powerpc/platforms/pseries/plpks-secvar.c217
-rw-r--r--arch/powerpc/platforms/pseries/plpks.c711
-rw-r--r--arch/powerpc/platforms/pseries/pmem.c167
-rw-r--r--arch/powerpc/platforms/pseries/power.c72
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h131
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c368
-rw-r--r--arch/powerpc/platforms/pseries/ras.c882
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c414
-rw-r--r--arch/powerpc/platforms/pseries/rng.c37
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.c557
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.h114
-rw-r--r--arch/powerpc/platforms/pseries/rtas-work-area.c209
-rw-r--r--arch/powerpc/platforms/pseries/setup.c1162
-rw-r--r--arch/powerpc/platforms/pseries/smp.c282
-rw-r--r--arch/powerpc/platforms/pseries/suspend.c189
-rw-r--r--arch/powerpc/platforms/pseries/svm.c94
-rw-r--r--arch/powerpc/platforms/pseries/vas-sysfs.c281
-rw-r--r--arch/powerpc/platforms/pseries/vas.c1121
-rw-r--r--arch/powerpc/platforms/pseries/vas.h157
-rw-r--r--arch/powerpc/platforms/pseries/vio.c1729
-rw-r--r--arch/powerpc/platforms/pseries/vphn.c90
392 files changed, 121876 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
new file mode 100644
index 000000000..b3c466c50
--- /dev/null
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0
+config ACADIA
+ bool "Acadia"
+ depends on 40x
+ select PPC40x_SIMPLE
+ select 405EZ
+ help
+ This option enables support for the AMCC 405EZ Acadia evaluation board.
+
+config HOTFOOT
+ bool "Hotfoot"
+ depends on 40x
+ select PPC40x_SIMPLE
+ select FORCE_PCI
+ help
+ This option enables support for the ESTEEM 195E Hotfoot board.
+
+config KILAUEA
+ bool "Kilauea"
+ depends on 40x
+ select 405EX
+ select PPC40x_SIMPLE
+ select PPC4xx_PCI_EXPRESS
+ select FORCE_PCI
+ select PCI_MSI
+ help
+ This option enables support for the AMCC PPC405EX evaluation board.
+
+config MAKALU
+ bool "Makalu"
+ depends on 40x
+ select 405EX
+ select FORCE_PCI
+ select PPC4xx_PCI_EXPRESS
+ select PPC40x_SIMPLE
+ help
+ This option enables support for the AMCC PPC405EX board.
+
+config OBS600
+ bool "OpenBlockS 600"
+ depends on 40x
+ select 405EX
+ select PPC40x_SIMPLE
+ help
+ This option enables support for PlatHome OpenBlockS 600 server
+
+config PPC40x_SIMPLE
+ bool "Simple PowerPC 40x board support"
+ depends on 40x
+ help
+ This option enables the simple PowerPC 40x platform support.
+
+config 405EX
+ bool
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+
+config 405EZ
+ bool
+ select IBM_EMAC_NO_FLOW_CTRL if IBM_EMAC
+ select IBM_EMAC_MAL_CLR_ICINTSTAT if IBM_EMAC
+ select IBM_EMAC_MAL_COMMON_ERR if IBM_EMAC
+
+config PPC4xx_GPIO
+ bool "PPC4xx GPIO support"
+ depends on 40x
+ select GPIOLIB
+ select OF_GPIO_MM_GPIOCHIP
+ help
+ Enable gpiolib support for ppc40x based boards
+
+config APM8018X
+ bool "APM8018X"
+ depends on 40x
+ select PPC40x_SIMPLE
+ help
+ This option enables support for the AppliedMicro APM8018X evaluation
+ board.
diff --git a/arch/powerpc/platforms/40x/Makefile b/arch/powerpc/platforms/40x/Makefile
new file mode 100644
index 000000000..122de9852
--- /dev/null
+++ b/arch/powerpc/platforms/40x/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PPC40x_SIMPLE) += ppc40x_simple.o
diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c
new file mode 100644
index 000000000..e454e9d2e
--- /dev/null
+++ b/arch/powerpc/platforms/40x/ppc40x_simple.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic PowerPC 40x platform support
+ *
+ * Copyright 2008 IBM Corporation
+ *
+ * This implements simple platform support for PowerPC 44x chips. This is
+ * mostly used for eval boards or other simple and "generic" 44x boards. If
+ * your board has custom functions or hardware, then you will likely want to
+ * implement your own board.c file to accommodate it.
+ */
+
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/uic.h>
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+static const struct of_device_id ppc40x_of_bus[] __initconst = {
+ { .compatible = "ibm,plb3", },
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,opb", },
+ { .compatible = "ibm,ebc", },
+ { .compatible = "simple-bus", },
+ {},
+};
+
+static int __init ppc40x_device_probe(void)
+{
+ of_platform_bus_probe(NULL, ppc40x_of_bus, NULL);
+
+ return 0;
+}
+machine_device_initcall(ppc40x_simple, ppc40x_device_probe);
+
+/* This is the list of boards that can be supported by this simple
+ * platform code. This does _not_ mean the boards are compatible,
+ * as they most certainly are not from a device tree perspective.
+ * However, their differences are handled by the device tree and the
+ * drivers and therefore they don't need custom board support files.
+ *
+ * Again, if your board needs to do things differently then create a
+ * board.c file for it rather than adding it to this list.
+ */
+static const char * const board[] __initconst = {
+ "amcc,acadia",
+ "amcc,haleakala",
+ "amcc,kilauea",
+ "amcc,makalu",
+ "apm,klondike",
+ "est,hotfoot",
+ "plathome,obs600",
+ NULL
+};
+
+static int __init ppc40x_probe(void)
+{
+ if (of_device_compatible_match(of_root, board)) {
+ pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+ return 1;
+ }
+
+ return 0;
+}
+
+define_machine(ppc40x_simple) {
+ .name = "PowerPC 40x Platform",
+ .probe = ppc40x_probe,
+ .progress = udbg_progress,
+ .init_IRQ = uic_init_tree,
+ .get_irq = uic_get_irq,
+ .restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/44x.h b/arch/powerpc/platforms/44x/44x.h
new file mode 100644
index 000000000..0e912a6a0
--- /dev/null
+++ b/arch/powerpc/platforms/44x/44x.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __POWERPC_PLATFORMS_44X_44X_H
+#define __POWERPC_PLATFORMS_44X_44X_H
+
+extern u8 as1_readb(volatile u8 __iomem *addr);
+extern void as1_writeb(u8 data, volatile u8 __iomem *addr);
+
+#define GPIO0_OSRH 0xC
+#define GPIO0_TSRH 0x14
+#define GPIO0_ISR1H 0x34
+
+#endif /* __POWERPC_PLATFORMS_44X_44X_H */
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
new file mode 100644
index 000000000..35a1f4b9f
--- /dev/null
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -0,0 +1,319 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_47x
+ bool "Support for 47x variant"
+ depends on 44x
+ select MPIC
+ help
+ This option enables support for the 47x family of processors and is
+ not currently compatible with other 44x or 46x variants
+
+config BAMBOO
+ bool "Bamboo"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 440EP
+ select FORCE_PCI
+ help
+ This option enables support for the IBM PPC440EP evaluation board.
+
+config BLUESTONE
+ bool "Bluestone"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select APM821xx
+ select FORCE_PCI
+ select PCI_MSI
+ select PPC4xx_PCI_EXPRESS
+ select IBM_EMAC_RGMII if IBM_EMAC
+ help
+ This option enables support for the APM APM821xx Evaluation board.
+
+config EBONY
+ bool "Ebony"
+ depends on 44x
+ default y
+ select 440GP
+ select FORCE_PCI
+ select OF_RTC
+ help
+ This option enables support for the IBM PPC440GP evaluation board.
+
+config SAM440EP
+ bool "Sam440ep"
+ depends on 44x
+ select 440EP
+ select FORCE_PCI
+ help
+ This option enables support for the ACube Sam440ep board.
+
+config SEQUOIA
+ bool "Sequoia"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 440EPX
+ help
+ This option enables support for the AMCC PPC440EPX evaluation board.
+
+config TAISHAN
+ bool "Taishan"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 440GX
+ select FORCE_PCI
+ help
+ This option enables support for the AMCC PPC440GX "Taishan"
+ evaluation board.
+
+config KATMAI
+ bool "Katmai"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 440SPe
+ select FORCE_PCI
+ select PPC4xx_PCI_EXPRESS
+ select PCI_MSI
+ help
+ This option enables support for the AMCC PPC440SPe evaluation board.
+
+config RAINIER
+ bool "Rainier"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 440GRX
+ select FORCE_PCI
+ help
+ This option enables support for the AMCC PPC440GRX evaluation board.
+
+config WARP
+ bool "PIKA Warp"
+ depends on 44x
+ select 440EP
+ help
+ This option enables support for the PIKA Warp(tm) Appliance. The Warp
+ is a small computer replacement with up to 9 ports of FXO/FXS plus VOIP
+ stations and trunks.
+
+ See http://www.pikatechnologies.com/ and follow the "PIKA for Computer
+ Telephony Developers" link for more information.
+
+config ARCHES
+ bool "Arches"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 460EX # Odd since it uses 460GT but the effects are the same
+ select FORCE_PCI
+ select PPC4xx_PCI_EXPRESS
+ help
+ This option enables support for the AMCC Dual PPC460GT evaluation board.
+
+config CANYONLANDS
+ bool "Canyonlands"
+ depends on 44x
+ select 460EX
+ select FORCE_PCI
+ select PPC4xx_PCI_EXPRESS
+ select PCI_MSI
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
+ help
+ This option enables support for the AMCC PPC460EX evaluation board.
+
+config GLACIER
+ bool "Glacier"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 460EX # Odd since it uses 460GT but the effects are the same
+ select FORCE_PCI
+ select PPC4xx_PCI_EXPRESS
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
+ help
+ This option enables support for the AMCC PPC460GT evaluation board.
+
+config REDWOOD
+ bool "Redwood"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 460SX
+ select FORCE_PCI
+ select PPC4xx_PCI_EXPRESS
+ select PCI_MSI
+ help
+ This option enables support for the AMCC PPC460SX Redwood board.
+
+config EIGER
+ bool "Eiger"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 460SX
+ select FORCE_PCI
+ select PPC4xx_PCI_EXPRESS
+ select IBM_EMAC_RGMII if IBM_EMAC
+ help
+ This option enables support for the AMCC PPC460SX evaluation board.
+
+config YOSEMITE
+ bool "Yosemite"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 440EP
+ select FORCE_PCI
+ help
+ This option enables support for the AMCC PPC440EP evaluation board.
+
+config ISS4xx
+ bool "ISS 4xx Simulator"
+ depends on 44x
+ select 440GP if 44x && !PPC_47x
+ select PPC_FPU
+ select OF_RTC
+ help
+ This option enables support for the IBM ISS simulation environment
+
+config CURRITUCK
+ bool "IBM Currituck (476fpe) Support"
+ depends on PPC_47x
+ select I2C
+ select SWIOTLB
+ select 476FPE
+ select FORCE_PCI
+ select PPC4xx_PCI_EXPRESS
+ help
+ This option enables support for the IBM Currituck (476fpe) evaluation board
+
+config FSP2
+ bool "IBM FSP2 (476fpe) Support"
+ depends on PPC_47x
+ select 476FPE
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select COMMON_CLK
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the IBM FSP2 (476fpe) board
+
+config AKEBONO
+ bool "IBM Akebono (476gtr) Support"
+ depends on PPC_47x
+ select SWIOTLB
+ select 476FPE
+ select PPC4xx_PCI_EXPRESS
+ select FORCE_PCI
+ select PCI_MSI
+ select PPC4xx_HSTA_MSI
+ select I2C
+ select I2C_IBM_IIC
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select USB if USB_SUPPORT
+ select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD
+ select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD
+ help
+ This option enables support for the IBM Akebono (476gtr) evaluation board
+
+
+config ICON
+ bool "Icon"
+ depends on 44x
+ select PPC44x_SIMPLE
+ select 440SPe
+ select FORCE_PCI
+ select PPC4xx_PCI_EXPRESS
+ help
+ This option enables support for the AMCC PPC440SPe evaluation board.
+
+config PPC44x_SIMPLE
+ bool "Simple PowerPC 44x board support"
+ depends on 44x
+ help
+ This option enables the simple PowerPC 44x platform support.
+
+config PPC4xx_GPIO
+ bool "PPC4xx GPIO support"
+ depends on 44x
+ select GPIOLIB
+ select OF_GPIO_MM_GPIOCHIP
+ help
+ Enable gpiolib support for ppc440 based boards
+
+# 44x specific CPU modules, selected based on the board above.
+config 440EP
+ bool
+ select PPC_FPU
+ select IBM440EP_ERR42
+ select IBM_EMAC_ZMII if IBM_EMAC
+
+config 440EPX
+ bool
+ select PPC_FPU
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
+ select USB_EHCI_BIG_ENDIAN_MMIO
+ select USB_EHCI_BIG_ENDIAN_DESC
+
+config 440GRX
+ bool
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
+
+config 440GP
+ bool
+ select IBM_EMAC_ZMII if IBM_EMAC
+
+config 440GX
+ bool
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC #test only
+ select IBM_EMAC_TAH if IBM_EMAC #test only
+
+config 440SP
+ bool
+
+config 440SPe
+ bool
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+
+config 460EX
+ bool
+ select PPC_FPU
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_TAH if IBM_EMAC
+
+config 460SX
+ bool
+ select PPC_FPU
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
+ select IBM_EMAC_TAH if IBM_EMAC
+
+config 476FPE
+ bool
+ select PPC_FPU
+
+config APM821xx
+ bool
+ select PPC_FPU
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_TAH if IBM_EMAC
+
+config 476FPE_ERR46
+ depends on 476FPE
+ bool "Enable linker work around for PPC476FPE errata #46"
+ help
+ This option enables a work around for an icache bug on 476
+ that can cause execution of stale instructions when falling
+ through pages (IBM errata #46). It requires a recent version
+ of binutils which supports the --ppc476-workaround option.
+
+ The work around enables the appropriate linker options and
+ ensures that all module output sections are aligned to 4K
+ page boundaries. The work around is only required when
+ building modules.
+
+# 44x errata/workaround config symbols, selected by the CPU models above
+config IBM440EP_ERR42
+ bool
+
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
new file mode 100644
index 000000000..5ba031f57
--- /dev/null
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += misc_44x.o machine_check.o
+ifneq ($(CONFIG_PPC4xx_CPM),y)
+obj-y += idle.o
+endif
+obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o
+obj-$(CONFIG_EBONY) += ebony.o
+obj-$(CONFIG_SAM440EP) += sam440ep.o
+obj-$(CONFIG_WARP) += warp.o
+obj-$(CONFIG_ISS4xx) += iss4xx.o
+obj-$(CONFIG_CANYONLANDS)+= canyonlands.o
+obj-$(CONFIG_CURRITUCK) += ppc476.o
+obj-$(CONFIG_AKEBONO) += ppc476.o
+obj-$(CONFIG_FSP2) += fsp2.o
diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c
new file mode 100644
index 000000000..8742a10d9
--- /dev/null
+++ b/arch/powerpc/platforms/44x/canyonlands.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This contain platform specific code for APM PPC460EX based Canyonlands
+ * board.
+ *
+ * Copyright (c) 2010, Applied Micro Circuits Corporation
+ * Author: Rupjyoti Sarmah <rsarmah@apm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <asm/udbg.h>
+#include <asm/uic.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/delay.h>
+#include "44x.h"
+
+#define BCSR_USB_EN 0x11
+
+static const struct of_device_id ppc460ex_of_bus[] __initconst = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,opb", },
+ { .compatible = "ibm,ebc", },
+ { .compatible = "simple-bus", },
+ {},
+};
+
+static int __init ppc460ex_device_probe(void)
+{
+ of_platform_bus_probe(NULL, ppc460ex_of_bus, NULL);
+
+ return 0;
+}
+machine_device_initcall(canyonlands, ppc460ex_device_probe);
+
+/* Using this code only for the Canyonlands board. */
+
+static int __init ppc460ex_probe(void)
+{
+ pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+
+ return 1;
+}
+
+/* USB PHY fixup code on Canyonlands kit. */
+
+static int __init ppc460ex_canyonlands_fixup(void)
+{
+ u8 __iomem *bcsr ;
+ void __iomem *vaddr;
+ struct device_node *np;
+ int ret = 0;
+
+ np = of_find_compatible_node(NULL, NULL, "amcc,ppc460ex-bcsr");
+ if (!np) {
+ printk(KERN_ERR "failed did not find amcc, ppc460ex bcsr node\n");
+ return -ENODEV;
+ }
+
+ bcsr = of_iomap(np, 0);
+ of_node_put(np);
+
+ if (!bcsr) {
+ printk(KERN_CRIT "Could not remap bcsr\n");
+ ret = -ENODEV;
+ goto err_bcsr;
+ }
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,ppc4xx-gpio");
+ if (!np) {
+ printk(KERN_ERR "failed did not find ibm,ppc4xx-gpio node\n");
+ return -ENODEV;
+ }
+
+ vaddr = of_iomap(np, 0);
+ of_node_put(np);
+
+ if (!vaddr) {
+ printk(KERN_CRIT "Could not get gpio node address\n");
+ ret = -ENODEV;
+ goto err_gpio;
+ }
+ /* Disable USB, through the BCSR7 bits */
+ setbits8(&bcsr[7], BCSR_USB_EN);
+
+ /* Wait for a while after reset */
+ msleep(100);
+
+ /* Enable USB here */
+ clrbits8(&bcsr[7], BCSR_USB_EN);
+
+ /*
+ * Configure multiplexed gpio16 and gpio19 as alternate1 output
+ * source after USB reset. In this configuration gpio16 will be
+ * USB2HStop and gpio19 will be USB2DStop. For more details refer to
+ * table 34-7 of PPC460EX user manual.
+ */
+ setbits32((vaddr + GPIO0_OSRH), 0x42000000);
+ setbits32((vaddr + GPIO0_TSRH), 0x42000000);
+err_gpio:
+ iounmap(vaddr);
+err_bcsr:
+ iounmap(bcsr);
+ return ret;
+}
+machine_device_initcall(canyonlands, ppc460ex_canyonlands_fixup);
+define_machine(canyonlands) {
+ .name = "Canyonlands",
+ .compatible = "amcc,canyonlands",
+ .probe = ppc460ex_probe,
+ .progress = udbg_progress,
+ .init_IRQ = uic_init_tree,
+ .get_irq = uic_get_irq,
+ .restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/ebony.c b/arch/powerpc/platforms/44x/ebony.c
new file mode 100644
index 000000000..4861310c8
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ebony.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Ebony board specific routines
+ *
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003-2005 Zultys Technologies
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+
+static const struct of_device_id ebony_of_bus[] __initconst = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,opb", },
+ { .compatible = "ibm,ebc", },
+ {},
+};
+
+static int __init ebony_device_probe(void)
+{
+ of_platform_bus_probe(NULL, ebony_of_bus, NULL);
+ of_instantiate_rtc();
+
+ return 0;
+}
+machine_device_initcall(ebony, ebony_device_probe);
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init ebony_probe(void)
+{
+ pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+
+ return 1;
+}
+
+define_machine(ebony) {
+ .name = "Ebony",
+ .compatible = "ibm,ebony",
+ .probe = ebony_probe,
+ .progress = udbg_progress,
+ .init_IRQ = uic_init_tree,
+ .get_irq = uic_get_irq,
+ .restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c
new file mode 100644
index 000000000..f6b8d02e0
--- /dev/null
+++ b/arch/powerpc/platforms/44x/fsp2.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * FSP-2 board specific routines
+ *
+ * Based on earlier code:
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003-2005 Zultys Technologies
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+
+#include <linux/init.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/dcr.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include "fsp2.h"
+
+#define FSP2_BUS_ERR "ibm,bus-error-irq"
+#define FSP2_CMU_ERR "ibm,cmu-error-irq"
+#define FSP2_CONF_ERR "ibm,conf-error-irq"
+#define FSP2_OPBD_ERR "ibm,opbd-error-irq"
+#define FSP2_MCUE "ibm,mc-ue-irq"
+#define FSP2_RST_WRN "ibm,reset-warning-irq"
+
+static __initdata struct of_device_id fsp2_of_bus[] = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,plb6", },
+ { .compatible = "ibm,opb", },
+ {},
+};
+
+static void l2regs(void)
+{
+ pr_err("L2 Controller:\n");
+ pr_err("MCK: 0x%08x\n", mfl2(L2MCK));
+ pr_err("INT: 0x%08x\n", mfl2(L2INT));
+ pr_err("PLBSTAT0: 0x%08x\n", mfl2(L2PLBSTAT0));
+ pr_err("PLBSTAT1: 0x%08x\n", mfl2(L2PLBSTAT1));
+ pr_err("ARRSTAT0: 0x%08x\n", mfl2(L2ARRSTAT0));
+ pr_err("ARRSTAT1: 0x%08x\n", mfl2(L2ARRSTAT1));
+ pr_err("ARRSTAT2: 0x%08x\n", mfl2(L2ARRSTAT2));
+ pr_err("CPUSTAT: 0x%08x\n", mfl2(L2CPUSTAT));
+ pr_err("RACSTAT0: 0x%08x\n", mfl2(L2RACSTAT0));
+ pr_err("WACSTAT0: 0x%08x\n", mfl2(L2WACSTAT0));
+ pr_err("WACSTAT1: 0x%08x\n", mfl2(L2WACSTAT1));
+ pr_err("WACSTAT2: 0x%08x\n", mfl2(L2WACSTAT2));
+ pr_err("WDFSTAT: 0x%08x\n", mfl2(L2WDFSTAT));
+ pr_err("LOG0: 0x%08x\n", mfl2(L2LOG0));
+ pr_err("LOG1: 0x%08x\n", mfl2(L2LOG1));
+ pr_err("LOG2: 0x%08x\n", mfl2(L2LOG2));
+ pr_err("LOG3: 0x%08x\n", mfl2(L2LOG3));
+ pr_err("LOG4: 0x%08x\n", mfl2(L2LOG4));
+ pr_err("LOG5: 0x%08x\n", mfl2(L2LOG5));
+}
+
+static void show_plbopb_regs(u32 base, int num)
+{
+ pr_err("\nPLBOPB Bridge %d:\n", num);
+ pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0));
+ pr_err("GESR1: 0x%08x\n", mfdcr(base + PLB4OPB_GESR1));
+ pr_err("GESR2: 0x%08x\n", mfdcr(base + PLB4OPB_GESR2));
+ pr_err("GEARU: 0x%08x\n", mfdcr(base + PLB4OPB_GEARU));
+ pr_err("GEAR: 0x%08x\n", mfdcr(base + PLB4OPB_GEAR));
+}
+
+static irqreturn_t bus_err_handler(int irq, void *data)
+{
+ pr_err("Bus Error\n");
+
+ l2regs();
+
+ pr_err("\nPLB6 Controller:\n");
+ pr_err("BC_SHD: 0x%08x\n", mfdcr(DCRN_PLB6_SHD));
+ pr_err("BC_ERR: 0x%08x\n", mfdcr(DCRN_PLB6_ERR));
+
+ pr_err("\nPLB6-to-PLB4 Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_ESR));
+ pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARH));
+ pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARL));
+
+ pr_err("\nPLB4-to-PLB6 Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_ESR));
+ pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARH));
+ pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARL));
+
+ pr_err("\nPLB6-to-MCIF Bridge:\n");
+ pr_err("BESR0: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR0));
+ pr_err("BESR1: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR1));
+ pr_err("BEARH: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARH));
+ pr_err("BEARL: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARL));
+
+ pr_err("\nPLB4 Arbiter:\n");
+ pr_err("P0ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRH));
+ pr_err("P0ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRL));
+ pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+ pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+ pr_err("P1ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRH));
+ pr_err("P1ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRL));
+ pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+ pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+
+ show_plbopb_regs(DCRN_PLB4OPB0_BASE, 0);
+ show_plbopb_regs(DCRN_PLB4OPB1_BASE, 1);
+ show_plbopb_regs(DCRN_PLB4OPB2_BASE, 2);
+ show_plbopb_regs(DCRN_PLB4OPB3_BASE, 3);
+
+ pr_err("\nPLB4-to-AHB Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_ESR));
+ pr_err("SEUAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SEUAR));
+ pr_err("SELAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SELAR));
+
+ pr_err("\nAHB-to-PLB4 Bridge:\n");
+ pr_err("\nESR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_ESR));
+ pr_err("\nEAR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_EAR));
+ panic("Bus Error\n");
+}
+
+static irqreturn_t cmu_err_handler(int irq, void *data) {
+ pr_err("CMU Error\n");
+ pr_err("FIR0: 0x%08x\n", mfcmu(CMUN_FIR0));
+ panic("CMU Error\n");
+}
+
+static irqreturn_t conf_err_handler(int irq, void *data) {
+ pr_err("Configuration Logic Error\n");
+ pr_err("CONF_FIR: 0x%08x\n", mfdcr(DCRN_CONF_FIR_RWC));
+ pr_err("RPERR0: 0x%08x\n", mfdcr(DCRN_CONF_RPERR0));
+ pr_err("RPERR1: 0x%08x\n", mfdcr(DCRN_CONF_RPERR1));
+ panic("Configuration Logic Error\n");
+}
+
+static irqreturn_t opbd_err_handler(int irq, void *data) {
+ panic("OPBD Error\n");
+}
+
+static irqreturn_t mcue_handler(int irq, void *data) {
+ pr_err("DDR: Uncorrectable Error\n");
+ pr_err("MCSTAT: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCSTAT));
+ pr_err("MCOPT1: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT1));
+ pr_err("MCOPT2: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT2));
+ pr_err("PHYSTAT: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_PHYSTAT));
+ pr_err("CFGR0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR0));
+ pr_err("CFGR1: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR1));
+ pr_err("CFGR2: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR2));
+ pr_err("CFGR3: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR3));
+ pr_err("SCRUB_CNTL: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_SCRUB_CNTL));
+ pr_err("ECCERR_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_PORT0));
+ pr_err("ECCERR_ADDR_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_ADDR_PORT0));
+ pr_err("ECCERR_CNT_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_COUNT_PORT0));
+ pr_err("ECC_CHECK_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECC_CHECK_PORT0));
+ pr_err("MCER0: 0x%08x\n",
+ mfdcr(DCRN_CW_BASE + DCRN_CW_MCER0));
+ pr_err("MCER1: 0x%08x\n",
+ mfdcr(DCRN_CW_BASE + DCRN_CW_MCER1));
+ pr_err("BESR: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BESR0));
+ pr_err("BEARL: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BEARL));
+ pr_err("BEARH: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BEARH));
+ panic("DDR: Uncorrectable Error\n");
+}
+
+static irqreturn_t rst_wrn_handler(int irq, void *data) {
+ u32 crcs = mfcmu(CMUN_CRCS);
+ switch (crcs & CRCS_STAT_MASK) {
+ case CRCS_STAT_CHIP_RST_B:
+ panic("Received chassis-initiated reset request");
+ default:
+ panic("Unknown external reset: CRCS=0x%x", crcs);
+ }
+}
+
+static void __init node_irq_request(const char *compat, irq_handler_t errirq_handler)
+{
+ struct device_node *np;
+ unsigned int irq;
+ int32_t rc;
+
+ for_each_compatible_node(np, NULL, compat) {
+ irq = irq_of_parse_and_map(np, 0);
+ if (!irq) {
+ pr_err("device tree node %pOFn is missing a interrupt",
+ np);
+ of_node_put(np);
+ return;
+ }
+
+ rc = request_irq(irq, errirq_handler, 0, np->name, np);
+ if (rc) {
+ pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d",
+ np, rc);
+ of_node_put(np);
+ return;
+ }
+ }
+}
+
+static void __init critical_irq_setup(void)
+{
+ node_irq_request(FSP2_CMU_ERR, cmu_err_handler);
+ node_irq_request(FSP2_BUS_ERR, bus_err_handler);
+ node_irq_request(FSP2_CONF_ERR, conf_err_handler);
+ node_irq_request(FSP2_OPBD_ERR, opbd_err_handler);
+ node_irq_request(FSP2_MCUE, mcue_handler);
+ node_irq_request(FSP2_RST_WRN, rst_wrn_handler);
+}
+
+static int __init fsp2_device_probe(void)
+{
+ of_platform_bus_probe(NULL, fsp2_of_bus, NULL);
+ return 0;
+}
+machine_device_initcall(fsp2, fsp2_device_probe);
+
+static int __init fsp2_probe(void)
+{
+ u32 val;
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "ibm,fsp2"))
+ return 0;
+
+ /* Clear BC_ERR and mask snoopable request plb errors. */
+ val = mfdcr(DCRN_PLB6_CR0);
+ val |= 0x20000000;
+ mtdcr(DCRN_PLB6_BASE, val);
+ mtdcr(DCRN_PLB6_HD, 0xffff0000);
+ mtdcr(DCRN_PLB6_SHD, 0xffff0000);
+
+ /* TVSENSE reset is blocked (clock gated) by the POR default of the TVS
+ * sleep config bit. As a consequence, TVSENSE will provide erratic
+ * sensor values, which may result in spurious (parity) errors
+ * recorded in the CMU FIR and leading to erroneous interrupt requests
+ * once the CMU interrupt is unmasked.
+ */
+
+ /* 1. set TVS1[UNDOZE] */
+ val = mfcmu(CMUN_TVS1);
+ val |= 0x4;
+ mtcmu(CMUN_TVS1, val);
+
+ /* 2. clear FIR[TVS] and FIR[TVSPAR] */
+ val = mfcmu(CMUN_FIR0);
+ val |= 0x30000000;
+ mtcmu(CMUN_FIR0, val);
+
+ /* L2 machine checks */
+ mtl2(L2PLBMCKEN0, 0xffffffff);
+ mtl2(L2PLBMCKEN1, 0x0000ffff);
+ mtl2(L2ARRMCKEN0, 0xffffffff);
+ mtl2(L2ARRMCKEN1, 0xffffffff);
+ mtl2(L2ARRMCKEN2, 0xfffff000);
+ mtl2(L2CPUMCKEN, 0xffffffff);
+ mtl2(L2RACMCKEN0, 0xffffffff);
+ mtl2(L2WACMCKEN0, 0xffffffff);
+ mtl2(L2WACMCKEN1, 0xffffffff);
+ mtl2(L2WACMCKEN2, 0xffffffff);
+ mtl2(L2WDFMCKEN, 0xffffffff);
+
+ /* L2 interrupts */
+ mtl2(L2PLBINTEN1, 0xffff0000);
+
+ /*
+ * At a global level, enable all L2 machine checks and interrupts
+ * reported by the L2 subsystems, except for the external machine check
+ * input (UIC0.1).
+ */
+ mtl2(L2MCKEN, 0x000007ff);
+ mtl2(L2INTEN, 0x000004ff);
+
+ /* Enable FSP-2 configuration logic parity errors */
+ mtdcr(DCRN_CONF_EIR_RS, 0x80000000);
+ return 1;
+}
+
+static void __init fsp2_irq_init(void)
+{
+ uic_init_tree();
+ critical_irq_setup();
+}
+
+define_machine(fsp2) {
+ .name = "FSP-2",
+ .probe = fsp2_probe,
+ .progress = udbg_progress,
+ .init_IRQ = fsp2_irq_init,
+ .get_irq = uic_get_irq,
+ .restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/fsp2.h b/arch/powerpc/platforms/44x/fsp2.h
new file mode 100644
index 000000000..9e1d52754
--- /dev/null
+++ b/arch/powerpc/platforms/44x/fsp2.h
@@ -0,0 +1,272 @@
+#ifndef _ASM_POWERPC_FSP_DCR_H_
+#define _ASM_POWERPC_FSP_DCR_H_
+#ifdef __KERNEL__
+#include <asm/dcr.h>
+
+#define DCRN_CMU_ADDR 0x00C /* Chip management unic addr */
+#define DCRN_CMU_DATA 0x00D /* Chip management unic data */
+
+/* PLB4 Arbiter */
+#define DCRN_PLB4_PCBI 0x010 /* PLB Crossbar ID/Rev Register */
+#define DCRN_PLB4_P0ACR 0x011 /* PLB0 Arbiter Control Register */
+#define DCRN_PLB4_P0ESRL 0x012 /* PLB0 Error Status Register Low */
+#define DCRN_PLB4_P0ESRH 0x013 /* PLB0 Error Status Register High */
+#define DCRN_PLB4_P0EARL 0x014 /* PLB0 Error Address Register Low */
+#define DCRN_PLB4_P0EARH 0x015 /* PLB0 Error Address Register High */
+#define DCRN_PLB4_P0ESRLS 0x016 /* PLB0 Error Status Register Low Set*/
+#define DCRN_PLB4_P0ESRHS 0x017 /* PLB0 Error Status Register High */
+#define DCRN_PLB4_PCBC 0x018 /* PLB Crossbar Control Register */
+#define DCRN_PLB4_P1ACR 0x019 /* PLB1 Arbiter Control Register */
+#define DCRN_PLB4_P1ESRL 0x01A /* PLB1 Error Status Register Low */
+#define DCRN_PLB4_P1ESRH 0x01B /* PLB1 Error Status Register High */
+#define DCRN_PLB4_P1EARL 0x01C /* PLB1 Error Address Register Low */
+#define DCRN_PLB4_P1EARH 0x01D /* PLB1 Error Address Register High */
+#define DCRN_PLB4_P1ESRLS 0x01E /* PLB1 Error Status Register Low Set*/
+#define DCRN_PLB4_P1ESRHS 0x01F /*PLB1 Error Status Register High Set*/
+
+/* PLB4/OPB bridge 0, 1, 2, 3 */
+#define DCRN_PLB4OPB0_BASE 0x020
+#define DCRN_PLB4OPB1_BASE 0x030
+#define DCRN_PLB4OPB2_BASE 0x040
+#define DCRN_PLB4OPB3_BASE 0x050
+
+#define PLB4OPB_GESR0 0x0 /* Error status 0: Master Dev 0-3 */
+#define PLB4OPB_GEAR 0x2 /* Error Address Register */
+#define PLB4OPB_GEARU 0x3 /* Error Upper Address Register */
+#define PLB4OPB_GESR1 0x4 /* Error Status 1: Master Dev 4-7 */
+#define PLB4OPB_GESR2 0xC /* Error Status 2: Master Dev 8-11 */
+
+/* PLB4-to-AHB Bridge */
+#define DCRN_PLB4AHB_BASE 0x400
+#define DCRN_PLB4AHB_SEUAR (DCRN_PLB4AHB_BASE + 1)
+#define DCRN_PLB4AHB_SELAR (DCRN_PLB4AHB_BASE + 2)
+#define DCRN_PLB4AHB_ESR (DCRN_PLB4AHB_BASE + 3)
+#define DCRN_AHBPLB4_ESR (DCRN_PLB4AHB_BASE + 8)
+#define DCRN_AHBPLB4_EAR (DCRN_PLB4AHB_BASE + 9)
+
+/* PLB6 Controller */
+#define DCRN_PLB6_BASE 0x11111300
+#define DCRN_PLB6_CR0 (DCRN_PLB6_BASE)
+#define DCRN_PLB6_ERR (DCRN_PLB6_BASE + 0x0B)
+#define DCRN_PLB6_HD (DCRN_PLB6_BASE + 0x0E)
+#define DCRN_PLB6_SHD (DCRN_PLB6_BASE + 0x10)
+
+/* PLB4-to-PLB6 Bridge */
+#define DCRN_PLB4PLB6_BASE 0x11111320
+#define DCRN_PLB4PLB6_ESR (DCRN_PLB4PLB6_BASE + 1)
+#define DCRN_PLB4PLB6_EARH (DCRN_PLB4PLB6_BASE + 3)
+#define DCRN_PLB4PLB6_EARL (DCRN_PLB4PLB6_BASE + 4)
+
+/* PLB6-to-PLB4 Bridge */
+#define DCRN_PLB6PLB4_BASE 0x11111350
+#define DCRN_PLB6PLB4_ESR (DCRN_PLB6PLB4_BASE + 1)
+#define DCRN_PLB6PLB4_EARH (DCRN_PLB6PLB4_BASE + 3)
+#define DCRN_PLB6PLB4_EARL (DCRN_PLB6PLB4_BASE + 4)
+
+/* PLB6-to-MCIF Bridge */
+#define DCRN_PLB6MCIF_BASE 0x11111380
+#define DCRN_PLB6MCIF_BESR0 (DCRN_PLB6MCIF_BASE + 0)
+#define DCRN_PLB6MCIF_BESR1 (DCRN_PLB6MCIF_BASE + 1)
+#define DCRN_PLB6MCIF_BEARL (DCRN_PLB6MCIF_BASE + 2)
+#define DCRN_PLB6MCIF_BEARH (DCRN_PLB6MCIF_BASE + 3)
+
+/* Configuration Logic Registers */
+#define DCRN_CONF_BASE 0x11111400
+#define DCRN_CONF_FIR_RWC (DCRN_CONF_BASE + 0x3A)
+#define DCRN_CONF_EIR_RS (DCRN_CONF_BASE + 0x3E)
+#define DCRN_CONF_RPERR0 (DCRN_CONF_BASE + 0x4D)
+#define DCRN_CONF_RPERR1 (DCRN_CONF_BASE + 0x4E)
+
+#define DCRN_L2CDCRAI 0x11111100
+#define DCRN_L2CDCRDI 0x11111104
+/* L2 indirect addresses */
+#define L2MCK 0x120
+#define L2MCKEN 0x130
+#define L2INT 0x150
+#define L2INTEN 0x160
+#define L2LOG0 0x180
+#define L2LOG1 0x184
+#define L2LOG2 0x188
+#define L2LOG3 0x18C
+#define L2LOG4 0x190
+#define L2LOG5 0x194
+#define L2PLBSTAT0 0x300
+#define L2PLBSTAT1 0x304
+#define L2PLBMCKEN0 0x330
+#define L2PLBMCKEN1 0x334
+#define L2PLBINTEN0 0x360
+#define L2PLBINTEN1 0x364
+#define L2ARRSTAT0 0x500
+#define L2ARRSTAT1 0x504
+#define L2ARRSTAT2 0x508
+#define L2ARRMCKEN0 0x530
+#define L2ARRMCKEN1 0x534
+#define L2ARRMCKEN2 0x538
+#define L2ARRINTEN0 0x560
+#define L2ARRINTEN1 0x564
+#define L2ARRINTEN2 0x568
+#define L2CPUSTAT 0x700
+#define L2CPUMCKEN 0x730
+#define L2CPUINTEN 0x760
+#define L2RACSTAT0 0x900
+#define L2RACMCKEN0 0x930
+#define L2RACINTEN0 0x960
+#define L2WACSTAT0 0xD00
+#define L2WACSTAT1 0xD04
+#define L2WACSTAT2 0xD08
+#define L2WACMCKEN0 0xD30
+#define L2WACMCKEN1 0xD34
+#define L2WACMCKEN2 0xD38
+#define L2WACINTEN0 0xD60
+#define L2WACINTEN1 0xD64
+#define L2WACINTEN2 0xD68
+#define L2WDFSTAT 0xF00
+#define L2WDFMCKEN 0xF30
+#define L2WDFINTEN 0xF60
+
+/* DDR3/4 Memory Controller */
+#define DCRN_DDR34_BASE 0x11120000
+#define DCRN_DDR34_MCSTAT 0x10
+#define DCRN_DDR34_MCOPT1 0x20
+#define DCRN_DDR34_MCOPT2 0x21
+#define DCRN_DDR34_PHYSTAT 0x32
+#define DCRN_DDR34_CFGR0 0x40
+#define DCRN_DDR34_CFGR1 0x41
+#define DCRN_DDR34_CFGR2 0x42
+#define DCRN_DDR34_CFGR3 0x43
+#define DCRN_DDR34_SCRUB_CNTL 0xAA
+#define DCRN_DDR34_SCRUB_INT 0xAB
+#define DCRN_DDR34_SCRUB_START_ADDR 0xB0
+#define DCRN_DDR34_SCRUB_END_ADDR 0xD0
+#define DCRN_DDR34_ECCERR_ADDR_PORT0 0xE0
+#define DCRN_DDR34_ECCERR_ADDR_PORT1 0xE1
+#define DCRN_DDR34_ECCERR_ADDR_PORT2 0xE2
+#define DCRN_DDR34_ECCERR_ADDR_PORT3 0xE3
+#define DCRN_DDR34_ECCERR_COUNT_PORT0 0xE4
+#define DCRN_DDR34_ECCERR_COUNT_PORT1 0xE5
+#define DCRN_DDR34_ECCERR_COUNT_PORT2 0xE6
+#define DCRN_DDR34_ECCERR_COUNT_PORT3 0xE7
+#define DCRN_DDR34_ECCERR_PORT0 0xF0
+#define DCRN_DDR34_ECCERR_PORT1 0xF2
+#define DCRN_DDR34_ECCERR_PORT2 0xF4
+#define DCRN_DDR34_ECCERR_PORT3 0xF6
+#define DCRN_DDR34_ECC_CHECK_PORT0 0xF8
+#define DCRN_DDR34_ECC_CHECK_PORT1 0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT2 0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT3 0xFB
+
+#define DDR34_SCRUB_CNTL_STOP 0x00000000
+#define DDR34_SCRUB_CNTL_SCRUB 0x80000000
+#define DDR34_SCRUB_CNTL_UE_STOP 0x20000000
+#define DDR34_SCRUB_CNTL_CE_STOP 0x10000000
+#define DDR34_SCRUB_CNTL_RANK_EN 0x00008000
+
+/* PLB-Attached DDR3/4 Core Wrapper */
+#define DCRN_CW_BASE 0x11111800
+#define DCRN_CW_MCER0 0x00
+#define DCRN_CW_MCER1 0x01
+#define DCRN_CW_MCER_AND0 0x02
+#define DCRN_CW_MCER_AND1 0x03
+#define DCRN_CW_MCER_OR0 0x04
+#define DCRN_CW_MCER_OR1 0x05
+#define DCRN_CW_MCER_MASK0 0x06
+#define DCRN_CW_MCER_MASK1 0x07
+#define DCRN_CW_MCER_MASK_AND0 0x08
+#define DCRN_CW_MCER_MASK_AND1 0x09
+#define DCRN_CW_MCER_MASK_OR0 0x0A
+#define DCRN_CW_MCER_MASK_OR1 0x0B
+#define DCRN_CW_MCER_ACTION0 0x0C
+#define DCRN_CW_MCER_ACTION1 0x0D
+#define DCRN_CW_MCER_WOF0 0x0E
+#define DCRN_CW_MCER_WOF1 0x0F
+#define DCRN_CW_LFIR 0x10
+#define DCRN_CW_LFIR_AND 0x11
+#define DCRN_CW_LFIR_OR 0x12
+#define DCRN_CW_LFIR_MASK 0x13
+#define DCRN_CW_LFIR_MASK_AND 0x14
+#define DCRN_CW_LFIR_MASK_OR 0x15
+
+#define CW_MCER0_MEM_CE 0x00020000
+/* CMU addresses */
+#define CMUN_CRCS 0x00 /* Chip Reset Control/Status */
+#define CMUN_CONFFIR0 0x20 /* Config Reg Parity FIR 0 */
+#define CMUN_CONFFIR1 0x21 /* Config Reg Parity FIR 1 */
+#define CMUN_CONFFIR2 0x22 /* Config Reg Parity FIR 2 */
+#define CMUN_CONFFIR3 0x23 /* Config Reg Parity FIR 3 */
+#define CMUN_URCR3_RS 0x24 /* Unit Reset Control Reg 3 Set */
+#define CMUN_URCR3_C 0x25 /* Unit Reset Control Reg 3 Clear */
+#define CMUN_URCR3_P 0x26 /* Unit Reset Control Reg 3 Pulse */
+#define CMUN_PW0 0x2C /* Pulse Width Register */
+#define CMUN_URCR0_P 0x2D /* Unit Reset Control Reg 0 Pulse */
+#define CMUN_URCR1_P 0x2E /* Unit Reset Control Reg 1 Pulse */
+#define CMUN_URCR2_P 0x2F /* Unit Reset Control Reg 2 Pulse */
+#define CMUN_CLS_RW 0x30 /* Code Load Status (Read/Write) */
+#define CMUN_CLS_S 0x31 /* Code Load Status (Set) */
+#define CMUN_CLS_C 0x32 /* Code Load Status (Clear */
+#define CMUN_URCR2_RS 0x33 /* Unit Reset Control Reg 2 Set */
+#define CMUN_URCR2_C 0x34 /* Unit Reset Control Reg 2 Clear */
+#define CMUN_CLKEN0 0x35 /* Clock Enable 0 */
+#define CMUN_CLKEN1 0x36 /* Clock Enable 1 */
+#define CMUN_PCD0 0x37 /* PSI clock divider 0 */
+#define CMUN_PCD1 0x38 /* PSI clock divider 1 */
+#define CMUN_TMR0 0x39 /* Reset Timer */
+#define CMUN_TVS0 0x3A /* TV Sense Reg 0 */
+#define CMUN_TVS1 0x3B /* TV Sense Reg 1 */
+#define CMUN_MCCR 0x3C /* DRAM Configuration Reg */
+#define CMUN_FIR0 0x3D /* Fault Isolation Reg 0 */
+#define CMUN_FMR0 0x3E /* FIR Mask Reg 0 */
+#define CMUN_ETDRB 0x3F /* ETDR Backdoor */
+
+/* CRCS bit fields */
+#define CRCS_STAT_MASK 0xF0000000
+#define CRCS_STAT_POR 0x10000000
+#define CRCS_STAT_PHR 0x20000000
+#define CRCS_STAT_PCIE 0x30000000
+#define CRCS_STAT_CRCS_SYS 0x40000000
+#define CRCS_STAT_DBCR_SYS 0x50000000
+#define CRCS_STAT_HOST_SYS 0x60000000
+#define CRCS_STAT_CHIP_RST_B 0x70000000
+#define CRCS_STAT_CRCS_CHIP 0x80000000
+#define CRCS_STAT_DBCR_CHIP 0x90000000
+#define CRCS_STAT_HOST_CHIP 0xA0000000
+#define CRCS_STAT_PSI_CHIP 0xB0000000
+#define CRCS_STAT_CRCS_CORE 0xC0000000
+#define CRCS_STAT_DBCR_CORE 0xD0000000
+#define CRCS_STAT_HOST_CORE 0xE0000000
+#define CRCS_STAT_PCIE_HOT 0xF0000000
+#define CRCS_STAT_SELF_CORE 0x40000000
+#define CRCS_STAT_SELF_CHIP 0x50000000
+#define CRCS_WATCHE 0x08000000
+#define CRCS_CORE 0x04000000 /* Reset PPC440 core */
+#define CRCS_CHIP 0x02000000 /* Chip Reset */
+#define CRCS_SYS 0x01000000 /* System Reset */
+#define CRCS_WRCR 0x00800000 /* Watchdog reset on core reset */
+#define CRCS_EXTCR 0x00080000 /* CHIP_RST_B triggers chip reset */
+#define CRCS_PLOCK 0x00000002 /* PLL Locked */
+
+#define mtcmu(reg, data) \
+do { \
+ mtdcr(DCRN_CMU_ADDR, reg); \
+ mtdcr(DCRN_CMU_DATA, data); \
+} while (0)
+
+#define mfcmu(reg)\
+ ({u32 data; \
+ mtdcr(DCRN_CMU_ADDR, reg); \
+ data = mfdcr(DCRN_CMU_DATA); \
+ data; })
+
+#define mtl2(reg, data) \
+do { \
+ mtdcr(DCRN_L2CDCRAI, reg); \
+ mtdcr(DCRN_L2CDCRDI, data); \
+} while (0)
+
+#define mfl2(reg) \
+ ({u32 data; \
+ mtdcr(DCRN_L2CDCRAI, reg); \
+ data = mfdcr(DCRN_L2CDCRDI); \
+ data; })
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_FSP2_DCR_H_ */
diff --git a/arch/powerpc/platforms/44x/idle.c b/arch/powerpc/platforms/44x/idle.c
new file mode 100644
index 000000000..f533b495e
--- /dev/null
+++ b/arch/powerpc/platforms/44x/idle.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2008 IBM Corp.
+ *
+ * Based on arch/powerpc/platforms/pasemi/idle.c:
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Added by: Jerone Young <jyoung5@us.ibm.com>
+ */
+
+#include <linux/of.h>
+#include <linux/kernel.h>
+#include <asm/machdep.h>
+
+static int mode_spin;
+
+static void ppc44x_idle(void)
+{
+ unsigned long msr_save;
+
+ msr_save = mfmsr();
+ /* set wait state MSR */
+ mtmsr(msr_save|MSR_WE|MSR_EE|MSR_CE|MSR_DE);
+ isync();
+ /* return to initial state */
+ mtmsr(msr_save);
+ isync();
+}
+
+int __init ppc44x_idle_init(void)
+{
+ if (!mode_spin) {
+ /* If we are not setting spin mode
+ then we set to wait mode */
+ ppc_md.power_save = &ppc44x_idle;
+ }
+
+ return 0;
+}
+
+arch_initcall(ppc44x_idle_init);
+
+static int __init idle_param(char *p)
+{
+
+ if (!strcmp("spin", p)) {
+ mode_spin = 1;
+ ppc_md.power_save = NULL;
+ }
+
+ return 0;
+}
+
+early_param("idle", idle_param);
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
new file mode 100644
index 000000000..ef883d97f
--- /dev/null
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PPC476 board specific routines
+ *
+ * Copyright 2010 Torez Smith, IBM Corporation.
+ *
+ * Based on earlier code:
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003-2005 Zultys Technologies
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/mpic.h>
+#include <asm/mmu.h>
+
+static const struct of_device_id iss4xx_of_bus[] __initconst = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,plb6", },
+ { .compatible = "ibm,opb", },
+ { .compatible = "ibm,ebc", },
+ {},
+};
+
+static int __init iss4xx_device_probe(void)
+{
+ of_platform_bus_probe(NULL, iss4xx_of_bus, NULL);
+ of_instantiate_rtc();
+
+ return 0;
+}
+machine_device_initcall(iss4xx, iss4xx_device_probe);
+
+/* We can have either UICs or MPICs */
+static void __init iss4xx_init_irq(void)
+{
+ struct device_node *np;
+
+ /* Find top level interrupt controller */
+ for_each_node_with_property(np, "interrupt-controller") {
+ if (!of_property_present(np, "interrupts"))
+ break;
+ }
+ if (np == NULL)
+ panic("Can't find top level interrupt controller");
+
+ /* Check type and do appropriate initialization */
+ if (of_device_is_compatible(np, "ibm,uic")) {
+ uic_init_tree();
+ ppc_md.get_irq = uic_get_irq;
+#ifdef CONFIG_MPIC
+ } else if (of_device_is_compatible(np, "chrp,open-pic")) {
+ /* The MPIC driver will get everything it needs from the
+ * device-tree, just pass 0 to all arguments
+ */
+ struct mpic *mpic = mpic_alloc(np, 0, MPIC_NO_RESET, 0, 0, " MPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+ ppc_md.get_irq = mpic_get_irq;
+#endif
+ } else
+ panic("Unrecognized top level interrupt controller");
+}
+
+#ifdef CONFIG_SMP
+static void smp_iss4xx_setup_cpu(int cpu)
+{
+ mpic_setup_this_cpu();
+}
+
+static int smp_iss4xx_kick_cpu(int cpu)
+{
+ struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
+ const u64 *spin_table_addr_prop;
+ u32 *spin_table;
+ extern void start_secondary_47x(void);
+
+ BUG_ON(cpunode == NULL);
+
+ /* Assume spin table. We could test for the enable-method in
+ * the device-tree but currently there's little point as it's
+ * our only supported method
+ */
+ spin_table_addr_prop = of_get_property(cpunode, "cpu-release-addr",
+ NULL);
+ if (spin_table_addr_prop == NULL) {
+ pr_err("CPU%d: Can't start, missing cpu-release-addr !\n", cpu);
+ return -ENOENT;
+ }
+
+ /* Assume it's mapped as part of the linear mapping. This is a bit
+ * fishy but will work fine for now
+ */
+ spin_table = (u32 *)__va(*spin_table_addr_prop);
+ pr_debug("CPU%d: Spin table mapped at %p\n", cpu, spin_table);
+
+ spin_table[3] = cpu;
+ smp_wmb();
+ spin_table[1] = __pa(start_secondary_47x);
+ mb();
+
+ return 0;
+}
+
+static struct smp_ops_t iss_smp_ops = {
+ .probe = smp_mpic_probe,
+ .message_pass = smp_mpic_message_pass,
+ .setup_cpu = smp_iss4xx_setup_cpu,
+ .kick_cpu = smp_iss4xx_kick_cpu,
+ .give_timebase = smp_generic_give_timebase,
+ .take_timebase = smp_generic_take_timebase,
+};
+
+static void __init iss4xx_smp_init(void)
+{
+ if (mmu_has_feature(MMU_FTR_TYPE_47x))
+ smp_ops = &iss_smp_ops;
+}
+
+#else /* CONFIG_SMP */
+static void __init iss4xx_smp_init(void) { }
+#endif /* CONFIG_SMP */
+
+static void __init iss4xx_setup_arch(void)
+{
+ iss4xx_smp_init();
+}
+
+define_machine(iss4xx) {
+ .name = "ISS-4xx",
+ .compatible = "ibm,iss-4xx",
+ .progress = udbg_progress,
+ .init_IRQ = iss4xx_init_irq,
+ .setup_arch = iss4xx_setup_arch,
+ .restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
new file mode 100644
index 000000000..5d19daacd
--- /dev/null
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+#include <asm/cacheflush.h>
+
+int machine_check_440A(struct pt_regs *regs)
+{
+ unsigned long reason = regs->esr;
+
+ printk("Machine check in kernel mode.\n");
+ if (reason & ESR_IMCP){
+ printk("Instruction Synchronous Machine Check exception\n");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ }
+ else {
+ u32 mcsr = mfspr(SPRN_MCSR);
+ if (mcsr & MCSR_IB)
+ printk("Instruction Read PLB Error\n");
+ if (mcsr & MCSR_DRB)
+ printk("Data Read PLB Error\n");
+ if (mcsr & MCSR_DWB)
+ printk("Data Write PLB Error\n");
+ if (mcsr & MCSR_TLBP)
+ printk("TLB Parity Error\n");
+ if (mcsr & MCSR_ICP){
+ flush_instruction_cache();
+ printk("I-Cache Parity Error\n");
+ }
+ if (mcsr & MCSR_DCSP)
+ printk("D-Cache Search Parity Error\n");
+ if (mcsr & MCSR_DCFP)
+ printk("D-Cache Flush Parity Error\n");
+ if (mcsr & MCSR_IMPE)
+ printk("Machine Check exception is imprecise\n");
+
+ /* Clear MCSR */
+ mtspr(SPRN_MCSR, mcsr);
+ }
+ return 0;
+}
+
+#ifdef CONFIG_PPC_47x
+int machine_check_47x(struct pt_regs *regs)
+{
+ unsigned long reason = regs->esr;
+ u32 mcsr;
+
+ printk(KERN_ERR "Machine check in kernel mode.\n");
+ if (reason & ESR_IMCP) {
+ printk(KERN_ERR "Instruction Synchronous Machine Check exception\n");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ return 0;
+ }
+ mcsr = mfspr(SPRN_MCSR);
+ if (mcsr & MCSR_IB)
+ printk(KERN_ERR "Instruction Read PLB Error\n");
+ if (mcsr & MCSR_DRB)
+ printk(KERN_ERR "Data Read PLB Error\n");
+ if (mcsr & MCSR_DWB)
+ printk(KERN_ERR "Data Write PLB Error\n");
+ if (mcsr & MCSR_TLBP)
+ printk(KERN_ERR "TLB Parity Error\n");
+ if (mcsr & MCSR_ICP) {
+ flush_instruction_cache();
+ printk(KERN_ERR "I-Cache Parity Error\n");
+ }
+ if (mcsr & MCSR_DCSP)
+ printk(KERN_ERR "D-Cache Search Parity Error\n");
+ if (mcsr & PPC47x_MCSR_GPR)
+ printk(KERN_ERR "GPR Parity Error\n");
+ if (mcsr & PPC47x_MCSR_FPR)
+ printk(KERN_ERR "FPR Parity Error\n");
+ if (mcsr & PPC47x_MCSR_IPR)
+ printk(KERN_ERR "Machine Check exception is imprecise\n");
+
+ /* Clear MCSR */
+ mtspr(SPRN_MCSR, mcsr);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_47x */
diff --git a/arch/powerpc/platforms/44x/misc_44x.S b/arch/powerpc/platforms/44x/misc_44x.S
new file mode 100644
index 000000000..3a0c4bd3d
--- /dev/null
+++ b/arch/powerpc/platforms/44x/misc_44x.S
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains miscellaneous low-level functions for PPC 44x.
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+
+ .text
+
+/*
+ * Do an IO access in AS1
+ */
+_GLOBAL(as1_readb)
+ mfmsr r7
+ ori r0,r7,MSR_DS
+ sync
+ mtmsr r0
+ sync
+ isync
+ lbz r3,0(r3)
+ sync
+ mtmsr r7
+ sync
+ isync
+ blr
+
+_GLOBAL(as1_writeb)
+ mfmsr r7
+ ori r0,r7,MSR_DS
+ sync
+ mtmsr r0
+ sync
+ isync
+ stb r3,0(r4)
+ sync
+ mtmsr r7
+ sync
+ isync
+ blr
diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c
new file mode 100644
index 000000000..971786ff1
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc44x_simple.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic PowerPC 44x platform support
+ *
+ * Copyright 2008 IBM Corporation
+ *
+ * This implements simple platform support for PowerPC 44x chips. This is
+ * mostly used for eval boards or other simple and "generic" 44x boards. If
+ * your board has custom functions or hardware, then you will likely want to
+ * implement your own board.c file to accommodate it.
+ */
+
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/uic.h>
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+static const struct of_device_id ppc44x_of_bus[] __initconst = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,opb", },
+ { .compatible = "ibm,ebc", },
+ { .compatible = "simple-bus", },
+ {},
+};
+
+static int __init ppc44x_device_probe(void)
+{
+ of_platform_bus_probe(NULL, ppc44x_of_bus, NULL);
+
+ return 0;
+}
+machine_device_initcall(ppc44x_simple, ppc44x_device_probe);
+
+/* This is the list of boards that can be supported by this simple
+ * platform code. This does _not_ mean the boards are compatible,
+ * as they most certainly are not from a device tree perspective.
+ * However, their differences are handled by the device tree and the
+ * drivers and therefore they don't need custom board support files.
+ *
+ * Again, if your board needs to do things differently then create a
+ * board.c file for it rather than adding it to this list.
+ */
+static char *board[] __initdata = {
+ "amcc,arches",
+ "amcc,bamboo",
+ "apm,bluestone",
+ "amcc,glacier",
+ "ibm,ebony",
+ "amcc,eiger",
+ "amcc,katmai",
+ "amcc,rainier",
+ "amcc,redwood",
+ "amcc,sequoia",
+ "amcc,taishan",
+ "amcc,yosemite",
+ "mosaixtech,icon"
+};
+
+static int __init ppc44x_probe(void)
+{
+ int i = 0;
+
+ for (i = 0; i < ARRAY_SIZE(board); i++) {
+ if (of_machine_is_compatible(board[i])) {
+ pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+define_machine(ppc44x_simple) {
+ .name = "PowerPC 44x Platform",
+ .probe = ppc44x_probe,
+ .progress = udbg_progress,
+ .init_IRQ = uic_init_tree,
+ .get_irq = uic_get_irq,
+ .restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c
new file mode 100644
index 000000000..164cbcd45
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC 476FPE board specific routines
+ *
+ * Copyright © 2013 Tony Breeds IBM Corporation
+ * Copyright © 2013 Alistair Popple IBM Corporation
+ *
+ * Based on earlier code:
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003-2005 Zultys Technologies
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ * Copyright © 2011 David Kliekamp IBM Corporation
+ */
+
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/mpic.h>
+#include <asm/mmu.h>
+#include <asm/swiotlb.h>
+
+#include <linux/pci.h>
+#include <linux/i2c.h>
+
+static const struct of_device_id ppc47x_of_bus[] __initconst = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,plb6", },
+ { .compatible = "ibm,opb", },
+ { .compatible = "ibm,ebc", },
+ {},
+};
+
+/* The EEPROM is missing and the default values are bogus. This forces USB in
+ * to EHCI mode */
+static void quirk_ppc_currituck_usb_fixup(struct pci_dev *dev)
+{
+ if (of_machine_is_compatible("ibm,currituck")) {
+ pci_write_config_dword(dev, 0xe0, 0x0114231f);
+ pci_write_config_dword(dev, 0xe4, 0x00006c40);
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(0x1033, 0x0035, quirk_ppc_currituck_usb_fixup);
+
+/* Akebono has an AVR microcontroller attached to the I2C bus
+ * which is used to power off/reset the system. */
+
+/* AVR I2C Commands */
+#define AVR_PWRCTL_CMD (0x26)
+
+/* Flags for the power control I2C commands */
+#define AVR_PWRCTL_PWROFF (0x01)
+#define AVR_PWRCTL_RESET (0x02)
+
+static struct i2c_client *avr_i2c_client;
+static void __noreturn avr_halt_system(int pwrctl_flags)
+{
+ /* Request the AVR to reset the system */
+ i2c_smbus_write_byte_data(avr_i2c_client,
+ AVR_PWRCTL_CMD, pwrctl_flags);
+
+ /* Wait for system to be reset */
+ while (1)
+ ;
+}
+
+static void avr_power_off_system(void)
+{
+ avr_halt_system(AVR_PWRCTL_PWROFF);
+}
+
+static void __noreturn avr_reset_system(char *cmd)
+{
+ avr_halt_system(AVR_PWRCTL_RESET);
+}
+
+static int avr_probe(struct i2c_client *client)
+{
+ avr_i2c_client = client;
+ ppc_md.restart = avr_reset_system;
+ pm_power_off = avr_power_off_system;
+ return 0;
+}
+
+static const struct i2c_device_id avr_id[] = {
+ { "akebono-avr", 0 },
+ { }
+};
+
+static struct i2c_driver avr_driver = {
+ .driver = {
+ .name = "akebono-avr",
+ },
+ .probe = avr_probe,
+ .id_table = avr_id,
+};
+
+static int __init ppc47x_device_probe(void)
+{
+ i2c_add_driver(&avr_driver);
+ of_platform_bus_probe(NULL, ppc47x_of_bus, NULL);
+
+ return 0;
+}
+machine_device_initcall(ppc47x_akebono, ppc47x_device_probe);
+machine_device_initcall(ppc47x_currituck, ppc47x_device_probe);
+
+static void __init ppc47x_init_irq(void)
+{
+ struct device_node *np;
+
+ /* Find top level interrupt controller */
+ for_each_node_with_property(np, "interrupt-controller") {
+ if (!of_property_present(np, "interrupts"))
+ break;
+ }
+ if (np == NULL)
+ panic("Can't find top level interrupt controller");
+
+ /* Check type and do appropriate initialization */
+ if (of_device_is_compatible(np, "chrp,open-pic")) {
+ /* The MPIC driver will get everything it needs from the
+ * device-tree, just pass 0 to all arguments
+ */
+ struct mpic *mpic =
+ mpic_alloc(np, 0, MPIC_NO_RESET, 0, 0, " MPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+ ppc_md.get_irq = mpic_get_irq;
+ } else
+ panic("Unrecognized top level interrupt controller");
+
+ of_node_put(np);
+}
+
+#ifdef CONFIG_SMP
+static void smp_ppc47x_setup_cpu(int cpu)
+{
+ mpic_setup_this_cpu();
+}
+
+static int smp_ppc47x_kick_cpu(int cpu)
+{
+ struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
+ const u64 *spin_table_addr_prop;
+ u32 *spin_table;
+ extern void start_secondary_47x(void);
+
+ BUG_ON(cpunode == NULL);
+
+ /* Assume spin table. We could test for the enable-method in
+ * the device-tree but currently there's little point as it's
+ * our only supported method
+ */
+ spin_table_addr_prop =
+ of_get_property(cpunode, "cpu-release-addr", NULL);
+
+ if (spin_table_addr_prop == NULL) {
+ pr_err("CPU%d: Can't start, missing cpu-release-addr !\n",
+ cpu);
+ return 1;
+ }
+
+ /* Assume it's mapped as part of the linear mapping. This is a bit
+ * fishy but will work fine for now
+ *
+ * XXX: Is there any reason to assume differently?
+ */
+ spin_table = (u32 *)__va(*spin_table_addr_prop);
+ pr_debug("CPU%d: Spin table mapped at %p\n", cpu, spin_table);
+
+ spin_table[3] = cpu;
+ smp_wmb();
+ spin_table[1] = __pa(start_secondary_47x);
+ mb();
+
+ return 0;
+}
+
+static struct smp_ops_t ppc47x_smp_ops = {
+ .probe = smp_mpic_probe,
+ .message_pass = smp_mpic_message_pass,
+ .setup_cpu = smp_ppc47x_setup_cpu,
+ .kick_cpu = smp_ppc47x_kick_cpu,
+ .give_timebase = smp_generic_give_timebase,
+ .take_timebase = smp_generic_take_timebase,
+};
+
+static void __init ppc47x_smp_init(void)
+{
+ if (mmu_has_feature(MMU_FTR_TYPE_47x))
+ smp_ops = &ppc47x_smp_ops;
+}
+
+#else /* CONFIG_SMP */
+static void __init ppc47x_smp_init(void) { }
+#endif /* CONFIG_SMP */
+
+static void __init ppc47x_setup_arch(void)
+{
+
+ /* No need to check the DMA config as we /know/ our windows are all of
+ * RAM. Lets hope that doesn't change */
+ swiotlb_detect_4g();
+
+ ppc47x_smp_init();
+}
+
+static int board_rev = -1;
+static int __init ppc47x_get_board_rev(void)
+{
+ int reg;
+ u8 __iomem *fpga;
+ struct device_node *np = NULL;
+
+ if (of_machine_is_compatible("ibm,currituck")) {
+ np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga");
+ reg = 0;
+ } else if (of_machine_is_compatible("ibm,akebono")) {
+ np = of_find_compatible_node(NULL, NULL, "ibm,akebono-fpga");
+ reg = 2;
+ }
+
+ if (!np)
+ goto fail;
+
+ fpga = of_iomap(np, 0);
+ of_node_put(np);
+ if (!fpga)
+ goto fail;
+
+ board_rev = ioread8(fpga + reg) & 0x03;
+ pr_info("%s: Found board revision %d\n", __func__, board_rev);
+ iounmap(fpga);
+ return 0;
+
+fail:
+ pr_info("%s: Unable to find board revision\n", __func__);
+ return 0;
+}
+machine_arch_initcall(ppc47x_akebono, ppc47x_get_board_rev);
+machine_arch_initcall(ppc47x_currituck, ppc47x_get_board_rev);
+
+/* Use USB controller should have been hardware swizzled but it wasn't :( */
+static void ppc47x_pci_irq_fixup(struct pci_dev *dev)
+{
+ if (dev->vendor == 0x1033 && (dev->device == 0x0035 ||
+ dev->device == 0x00e0)) {
+ if (board_rev == 0) {
+ dev->irq = irq_create_mapping(NULL, 47);
+ pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
+ } else if (board_rev == 2) {
+ dev->irq = irq_create_mapping(NULL, 49);
+ pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
+ } else {
+ pr_alert("%s: Unknown board revision\n", __func__);
+ }
+ }
+}
+
+define_machine(ppc47x_akebono) {
+ .name = "PowerPC 47x (akebono)",
+ .compatible = "ibm,akebono",
+ .progress = udbg_progress,
+ .init_IRQ = ppc47x_init_irq,
+ .setup_arch = ppc47x_setup_arch,
+ .restart = ppc4xx_reset_system,
+};
+
+define_machine(ppc47x_currituck) {
+ .name = "PowerPC 47x (currituck)",
+ .compatible = "ibm,currituck",
+ .progress = udbg_progress,
+ .init_IRQ = ppc47x_init_irq,
+ .pci_irq_fixup = ppc47x_pci_irq_fixup,
+ .setup_arch = ppc47x_setup_arch,
+ .restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/ppc476_modules.lds b/arch/powerpc/platforms/44x/ppc476_modules.lds
new file mode 100644
index 000000000..9fec5d34b
--- /dev/null
+++ b/arch/powerpc/platforms/44x/ppc476_modules.lds
@@ -0,0 +1,15 @@
+SECTIONS
+{
+ .text : ALIGN(4096)
+ {
+ *(.text .text.* .fixup)
+ }
+ .init.text : ALIGN(4096)
+ {
+ *(.init.text .init.text.*)
+ }
+ .exit.text : ALIGN(4096)
+ {
+ *(.exit.text .exit.text.*)
+ }
+}
diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c
new file mode 100644
index 000000000..5cdaa4068
--- /dev/null
+++ b/arch/powerpc/platforms/44x/sam440ep.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Sam440ep board specific routines based off bamboo.c code
+ * original copyrights below
+ *
+ * Wade Farnsworth <wfarnsworth@mvista.com>
+ * Copyright 2004 MontaVista Software Inc.
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ * Copyright 2007 IBM Corporation
+ *
+ * Modified from bamboo.c for sam440ep:
+ * Copyright 2008 Giuseppe Coviello <gicoviello@gmail.com>
+ */
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc4xx.h>
+#include <linux/i2c.h>
+
+static const struct of_device_id sam440ep_of_bus[] __initconst = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,opb", },
+ { .compatible = "ibm,ebc", },
+ {},
+};
+
+static int __init sam440ep_device_probe(void)
+{
+ of_platform_bus_probe(NULL, sam440ep_of_bus, NULL);
+
+ return 0;
+}
+machine_device_initcall(sam440ep, sam440ep_device_probe);
+
+static int __init sam440ep_probe(void)
+{
+ pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+
+ return 1;
+}
+
+define_machine(sam440ep) {
+ .name = "Sam440ep",
+ .compatible = "acube,sam440ep",
+ .probe = sam440ep_probe,
+ .progress = udbg_progress,
+ .init_IRQ = uic_init_tree,
+ .get_irq = uic_get_irq,
+ .restart = ppc4xx_reset_system,
+};
+
+static struct i2c_board_info sam440ep_rtc_info = {
+ .type = "m41st85",
+ .addr = 0x68,
+ .irq = -1,
+};
+
+static int __init sam440ep_setup_rtc(void)
+{
+ return i2c_register_board_info(0, &sam440ep_rtc_info, 1);
+}
+machine_device_initcall(sam440ep, sam440ep_setup_rtc);
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
new file mode 100644
index 000000000..bf0188dcb
--- /dev/null
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PIKA Warp(tm) board specific routines
+ *
+ * Copyright (c) 2008-2009 PIKA Technologies
+ * Sean MacLennan <smaclennan@pikatech.com>
+ */
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/kthread.h>
+#include <linux/leds.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/gpio/consumer.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/dma.h>
+
+
+static const struct of_device_id warp_of_bus[] __initconst = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,opb", },
+ { .compatible = "ibm,ebc", },
+ {},
+};
+
+static int __init warp_device_probe(void)
+{
+ of_platform_bus_probe(NULL, warp_of_bus, NULL);
+ return 0;
+}
+machine_device_initcall(warp, warp_device_probe);
+
+define_machine(warp) {
+ .name = "Warp",
+ .compatible = "pika,warp",
+ .progress = udbg_progress,
+ .init_IRQ = uic_init_tree,
+ .get_irq = uic_get_irq,
+ .restart = ppc4xx_reset_system,
+};
+
+
+static int __init warp_post_info(void)
+{
+ struct device_node *np;
+ void __iomem *fpga;
+ u32 post1, post2;
+
+ /* Sighhhh... POST information is in the sd area. */
+ np = of_find_compatible_node(NULL, NULL, "pika,fpga-sd");
+ if (np == NULL)
+ return -ENOENT;
+
+ fpga = of_iomap(np, 0);
+ of_node_put(np);
+ if (fpga == NULL)
+ return -ENOENT;
+
+ post1 = in_be32(fpga + 0x40);
+ post2 = in_be32(fpga + 0x44);
+
+ iounmap(fpga);
+
+ if (post1 || post2)
+ printk(KERN_INFO "Warp POST %08x %08x\n", post1, post2);
+ else
+ printk(KERN_INFO "Warp POST OK\n");
+
+ return 0;
+}
+
+
+#ifdef CONFIG_SENSORS_AD7414
+
+static void __iomem *dtm_fpga;
+
+#define WARP_GREEN_LED 0
+#define WARP_RED_LED 1
+
+static struct gpio_led warp_gpio_led_pins[] = {
+ [WARP_GREEN_LED] = {
+ .name = "green",
+ .default_state = LEDS_DEFSTATE_KEEP,
+ .gpiod = NULL, /* to be filled by pika_setup_leds() */
+ },
+ [WARP_RED_LED] = {
+ .name = "red",
+ .default_state = LEDS_DEFSTATE_KEEP,
+ .gpiod = NULL, /* to be filled by pika_setup_leds() */
+ },
+};
+
+static struct gpio_led_platform_data warp_gpio_led_data = {
+ .leds = warp_gpio_led_pins,
+ .num_leds = ARRAY_SIZE(warp_gpio_led_pins),
+};
+
+static struct platform_device warp_gpio_leds = {
+ .name = "leds-gpio",
+ .id = -1,
+ .dev = {
+ .platform_data = &warp_gpio_led_data,
+ },
+};
+
+static irqreturn_t temp_isr(int irq, void *context)
+{
+ int value = 1;
+
+ local_irq_disable();
+
+ gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0);
+
+ printk(KERN_EMERG "\n\nCritical Temperature Shutdown\n\n");
+
+ while (1) {
+ if (dtm_fpga) {
+ unsigned reset = in_be32(dtm_fpga + 0x14);
+ out_be32(dtm_fpga + 0x14, reset);
+ }
+
+ gpiod_set_value(warp_gpio_led_pins[WARP_RED_LED].gpiod, value);
+ value ^= 1;
+ mdelay(500);
+ }
+
+ /* Not reached */
+ return IRQ_HANDLED;
+}
+
+/*
+ * Because green and red power LEDs are normally driven by leds-gpio driver,
+ * but in case of critical temperature shutdown we want to drive them
+ * ourselves, we acquire both and then create leds-gpio platform device
+ * ourselves, instead of doing it through device tree. This way we can still
+ * keep access to the gpios and use them when needed.
+ */
+static int pika_setup_leds(void)
+{
+ struct device_node *np, *child;
+ struct gpio_desc *gpio;
+ struct gpio_led *led;
+ int led_count = 0;
+ int error;
+ int i;
+
+ np = of_find_compatible_node(NULL, NULL, "warp-power-leds");
+ if (!np) {
+ printk(KERN_ERR __FILE__ ": Unable to find leds\n");
+ return -ENOENT;
+ }
+
+ for_each_child_of_node(np, child) {
+ for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) {
+ led = &warp_gpio_led_pins[i];
+
+ if (!of_node_name_eq(child, led->name))
+ continue;
+
+ if (led->gpiod) {
+ printk(KERN_ERR __FILE__ ": %s led has already been defined\n",
+ led->name);
+ continue;
+ }
+
+ gpio = fwnode_gpiod_get_index(of_fwnode_handle(child),
+ NULL, 0, GPIOD_ASIS,
+ led->name);
+ error = PTR_ERR_OR_ZERO(gpio);
+ if (error) {
+ printk(KERN_ERR __FILE__ ": Failed to get %s led gpio: %d\n",
+ led->name, error);
+ of_node_put(child);
+ goto err_cleanup_pins;
+ }
+
+ led->gpiod = gpio;
+ led_count++;
+ }
+ }
+
+ of_node_put(np);
+
+ /* Skip device registration if no leds have been defined */
+ if (led_count) {
+ error = platform_device_register(&warp_gpio_leds);
+ if (error) {
+ printk(KERN_ERR __FILE__ ": Unable to add leds-gpio: %d\n",
+ error);
+ goto err_cleanup_pins;
+ }
+ }
+
+ return 0;
+
+err_cleanup_pins:
+ for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) {
+ led = &warp_gpio_led_pins[i];
+ gpiod_put(led->gpiod);
+ led->gpiod = NULL;
+ }
+ return error;
+}
+
+static void pika_setup_critical_temp(struct device_node *np,
+ struct i2c_client *client)
+{
+ int irq, rc;
+
+ /* Do this before enabling critical temp interrupt since we
+ * may immediately interrupt.
+ */
+ pika_setup_leds();
+
+ /* These registers are in 1 degree increments. */
+ i2c_smbus_write_byte_data(client, 2, 65); /* Thigh */
+ i2c_smbus_write_byte_data(client, 3, 0); /* Tlow */
+
+ irq = irq_of_parse_and_map(np, 0);
+ if (!irq) {
+ printk(KERN_ERR __FILE__ ": Unable to get ad7414 irq\n");
+ return;
+ }
+
+ rc = request_irq(irq, temp_isr, 0, "ad7414", NULL);
+ if (rc) {
+ printk(KERN_ERR __FILE__
+ ": Unable to request ad7414 irq %d = %d\n", irq, rc);
+ return;
+ }
+}
+
+static inline void pika_dtm_check_fan(void __iomem *fpga)
+{
+ static int fan_state;
+ u32 fan = in_be32(fpga + 0x34) & (1 << 14);
+
+ if (fan_state != fan) {
+ fan_state = fan;
+ if (fan)
+ printk(KERN_WARNING "Fan rotation error detected."
+ " Please check hardware.\n");
+ }
+}
+
+static int pika_dtm_thread(void __iomem *fpga)
+{
+ struct device_node *np;
+ struct i2c_client *client;
+
+ np = of_find_compatible_node(NULL, NULL, "adi,ad7414");
+ if (np == NULL)
+ return -ENOENT;
+
+ client = of_find_i2c_device_by_node(np);
+ if (client == NULL) {
+ of_node_put(np);
+ return -ENOENT;
+ }
+
+ pika_setup_critical_temp(np, client);
+
+ of_node_put(np);
+
+ printk(KERN_INFO "Warp DTM thread running.\n");
+
+ while (!kthread_should_stop()) {
+ int val;
+
+ val = i2c_smbus_read_word_data(client, 0);
+ if (val < 0)
+ dev_dbg(&client->dev, "DTM read temp failed.\n");
+ else {
+ s16 temp = swab16(val);
+ out_be32(fpga + 0x20, temp);
+ }
+
+ pika_dtm_check_fan(fpga);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ);
+ }
+
+ return 0;
+}
+
+static int __init pika_dtm_start(void)
+{
+ struct task_struct *dtm_thread;
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, "pika,fpga");
+ if (np == NULL)
+ return -ENOENT;
+
+ dtm_fpga = of_iomap(np, 0);
+ of_node_put(np);
+ if (dtm_fpga == NULL)
+ return -ENOENT;
+
+ /* Must get post info before thread starts. */
+ warp_post_info();
+
+ dtm_thread = kthread_run(pika_dtm_thread, dtm_fpga, "pika-dtm");
+ if (IS_ERR(dtm_thread)) {
+ iounmap(dtm_fpga);
+ return PTR_ERR(dtm_thread);
+ }
+
+ return 0;
+}
+machine_late_initcall(warp, pika_dtm_start);
+
+#else /* !CONFIG_SENSORS_AD7414 */
+
+machine_late_initcall(warp, warp_post_info);
+
+#endif
diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile
new file mode 100644
index 000000000..2071a0abe
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y += uic.o machine_check.o
+obj-$(CONFIG_4xx_SOC) += soc.o
+obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_PPC4xx_HSTA_MSI) += hsta_msi.o
+obj-$(CONFIG_PPC4xx_CPM) += cpm.o
+obj-$(CONFIG_PPC4xx_GPIO) += gpio.o
diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c
new file mode 100644
index 000000000..670f8ad44
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/cpm.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC 4xx Clock and Power Management
+ *
+ * Copyright (C) 2010, Applied Micro Circuits Corporation
+ * Victor Gallardo (vgallardo@apm.com)
+ *
+ * Based on arch/powerpc/platforms/44x/idle.c:
+ * Jerone Young <jyoung5@us.ibm.com>
+ * Copyright 2008 IBM Corp.
+ *
+ * Based on arch/powerpc/sysdev/fsl_pmc.c:
+ * Anton Vorontsov <avorontsov@ru.mvista.com>
+ * Copyright 2009 MontaVista Software, Inc.
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/sysfs.h>
+#include <linux/cpu.h>
+#include <linux/suspend.h>
+#include <asm/dcr.h>
+#include <asm/dcr-native.h>
+#include <asm/machdep.h>
+
+#define CPM_ER 0
+#define CPM_FR 1
+#define CPM_SR 2
+
+#define CPM_IDLE_WAIT 0
+#define CPM_IDLE_DOZE 1
+
+struct cpm {
+ dcr_host_t dcr_host;
+ unsigned int dcr_offset[3];
+ unsigned int powersave_off;
+ unsigned int unused;
+ unsigned int idle_doze;
+ unsigned int standby;
+ unsigned int suspend;
+};
+
+static struct cpm cpm;
+
+struct cpm_idle_mode {
+ unsigned int enabled;
+ const char *name;
+};
+
+static struct cpm_idle_mode idle_mode[] = {
+ [CPM_IDLE_WAIT] = { 1, "wait" }, /* default */
+ [CPM_IDLE_DOZE] = { 0, "doze" },
+};
+
+static unsigned int cpm_set(unsigned int cpm_reg, unsigned int mask)
+{
+ unsigned int value;
+
+ /* CPM controller supports 3 different types of sleep interface
+ * known as class 1, 2 and 3. For class 1 units, they are
+ * unconditionally put to sleep when the corresponding CPM bit is
+ * set. For class 2 and 3 units this is not case; if they can be
+ * put to sleep, they will. Here we do not verify, we just
+ * set them and expect them to eventually go off when they can.
+ */
+ value = dcr_read(cpm.dcr_host, cpm.dcr_offset[cpm_reg]);
+ dcr_write(cpm.dcr_host, cpm.dcr_offset[cpm_reg], value | mask);
+
+ /* return old state, to restore later if needed */
+ return value;
+}
+
+static void cpm_idle_wait(void)
+{
+ unsigned long msr_save;
+
+ /* save off initial state */
+ msr_save = mfmsr();
+ /* sync required when CPM0_ER[CPU] is set */
+ mb();
+ /* set wait state MSR */
+ mtmsr(msr_save|MSR_WE|MSR_EE|MSR_CE|MSR_DE);
+ isync();
+ /* return to initial state */
+ mtmsr(msr_save);
+ isync();
+}
+
+static void cpm_idle_sleep(unsigned int mask)
+{
+ unsigned int er_save;
+
+ /* update CPM_ER state */
+ er_save = cpm_set(CPM_ER, mask);
+
+ /* go to wait state so that CPM0_ER[CPU] can take effect */
+ cpm_idle_wait();
+
+ /* restore CPM_ER state */
+ dcr_write(cpm.dcr_host, cpm.dcr_offset[CPM_ER], er_save);
+}
+
+static void cpm_idle_doze(void)
+{
+ cpm_idle_sleep(cpm.idle_doze);
+}
+
+static void cpm_idle_config(int mode)
+{
+ int i;
+
+ if (idle_mode[mode].enabled)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(idle_mode); i++)
+ idle_mode[i].enabled = 0;
+
+ idle_mode[mode].enabled = 1;
+}
+
+static ssize_t cpm_idle_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ char *s = buf;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(idle_mode); i++) {
+ if (idle_mode[i].enabled)
+ s += sprintf(s, "[%s] ", idle_mode[i].name);
+ else
+ s += sprintf(s, "%s ", idle_mode[i].name);
+ }
+
+ *(s-1) = '\n'; /* convert the last space to a newline */
+
+ return s - buf;
+}
+
+static ssize_t cpm_idle_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ int i;
+ char *p;
+ int len;
+
+ p = memchr(buf, '\n', n);
+ len = p ? p - buf : n;
+
+ for (i = 0; i < ARRAY_SIZE(idle_mode); i++) {
+ if (strncmp(buf, idle_mode[i].name, len) == 0) {
+ cpm_idle_config(i);
+ return n;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static struct kobj_attribute cpm_idle_attr =
+ __ATTR(idle, 0644, cpm_idle_show, cpm_idle_store);
+
+static void __init cpm_idle_config_sysfs(void)
+{
+ struct device *dev;
+ unsigned long ret;
+
+ dev = get_cpu_device(0);
+
+ ret = sysfs_create_file(&dev->kobj,
+ &cpm_idle_attr.attr);
+ if (ret)
+ printk(KERN_WARNING
+ "cpm: failed to create idle sysfs entry\n");
+}
+
+static void cpm_idle(void)
+{
+ if (idle_mode[CPM_IDLE_DOZE].enabled)
+ cpm_idle_doze();
+ else
+ cpm_idle_wait();
+}
+
+static int cpm_suspend_valid(suspend_state_t state)
+{
+ switch (state) {
+ case PM_SUSPEND_STANDBY:
+ return !!cpm.standby;
+ case PM_SUSPEND_MEM:
+ return !!cpm.suspend;
+ default:
+ return 0;
+ }
+}
+
+static void cpm_suspend_standby(unsigned int mask)
+{
+ unsigned long tcr_save;
+
+ /* disable decrement interrupt */
+ tcr_save = mfspr(SPRN_TCR);
+ mtspr(SPRN_TCR, tcr_save & ~TCR_DIE);
+
+ /* go to sleep state */
+ cpm_idle_sleep(mask);
+
+ /* restore decrement interrupt */
+ mtspr(SPRN_TCR, tcr_save);
+}
+
+static int cpm_suspend_enter(suspend_state_t state)
+{
+ switch (state) {
+ case PM_SUSPEND_STANDBY:
+ cpm_suspend_standby(cpm.standby);
+ break;
+ case PM_SUSPEND_MEM:
+ cpm_suspend_standby(cpm.suspend);
+ break;
+ }
+
+ return 0;
+}
+
+static const struct platform_suspend_ops cpm_suspend_ops = {
+ .valid = cpm_suspend_valid,
+ .enter = cpm_suspend_enter,
+};
+
+static int __init cpm_get_uint_property(struct device_node *np,
+ const char *name)
+{
+ int len;
+ const unsigned int *prop = of_get_property(np, name, &len);
+
+ if (prop == NULL || len < sizeof(u32))
+ return 0;
+
+ return *prop;
+}
+
+static int __init cpm_init(void)
+{
+ struct device_node *np;
+ int dcr_base, dcr_len;
+ int ret = 0;
+
+ if (!cpm.powersave_off) {
+ cpm_idle_config(CPM_IDLE_WAIT);
+ ppc_md.power_save = &cpm_idle;
+ }
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,cpm");
+ if (!np) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ dcr_base = dcr_resource_start(np, 0);
+ dcr_len = dcr_resource_len(np, 0);
+
+ if (dcr_base == 0 || dcr_len == 0) {
+ printk(KERN_ERR "cpm: could not parse dcr property for %pOF\n",
+ np);
+ ret = -EINVAL;
+ goto node_put;
+ }
+
+ cpm.dcr_host = dcr_map(np, dcr_base, dcr_len);
+
+ if (!DCR_MAP_OK(cpm.dcr_host)) {
+ printk(KERN_ERR "cpm: failed to map dcr property for %pOF\n",
+ np);
+ ret = -EINVAL;
+ goto node_put;
+ }
+
+ /* All 4xx SoCs with a CPM controller have one of two
+ * different order for the CPM registers. Some have the
+ * CPM registers in the following order (ER,FR,SR). The
+ * others have them in the following order (SR,ER,FR).
+ */
+
+ if (cpm_get_uint_property(np, "er-offset") == 0) {
+ cpm.dcr_offset[CPM_ER] = 0;
+ cpm.dcr_offset[CPM_FR] = 1;
+ cpm.dcr_offset[CPM_SR] = 2;
+ } else {
+ cpm.dcr_offset[CPM_ER] = 1;
+ cpm.dcr_offset[CPM_FR] = 2;
+ cpm.dcr_offset[CPM_SR] = 0;
+ }
+
+ /* Now let's see what IPs to turn off for the following modes */
+
+ cpm.unused = cpm_get_uint_property(np, "unused-units");
+ cpm.idle_doze = cpm_get_uint_property(np, "idle-doze");
+ cpm.standby = cpm_get_uint_property(np, "standby");
+ cpm.suspend = cpm_get_uint_property(np, "suspend");
+
+ /* If some IPs are unused let's turn them off now */
+
+ if (cpm.unused) {
+ cpm_set(CPM_ER, cpm.unused);
+ cpm_set(CPM_FR, cpm.unused);
+ }
+
+ /* Now let's export interfaces */
+
+ if (!cpm.powersave_off && cpm.idle_doze)
+ cpm_idle_config_sysfs();
+
+ if (cpm.standby || cpm.suspend)
+ suspend_set_ops(&cpm_suspend_ops);
+node_put:
+ of_node_put(np);
+out:
+ return ret;
+}
+
+late_initcall(cpm_init);
+
+static int __init cpm_powersave_off(char *arg)
+{
+ cpm.powersave_off = 1;
+ return 1;
+}
+__setup("powersave=off", cpm_powersave_off);
diff --git a/arch/powerpc/platforms/4xx/gpio.c b/arch/powerpc/platforms/4xx/gpio.c
new file mode 100644
index 000000000..e5f2319e5
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/gpio.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PPC4xx gpio driver
+ *
+ * Copyright (c) 2008 Harris Corporation
+ * Copyright (c) 2008 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
+ * Copyright (c) MontaVista Software, Inc. 2008.
+ *
+ * Author: Steve Falco <sfalco@harris.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/gpio/legacy-of-mm-gpiochip.h>
+#include <linux/gpio/driver.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+#define GPIO_MASK(gpio) (0x80000000 >> (gpio))
+#define GPIO_MASK2(gpio) (0xc0000000 >> ((gpio) * 2))
+
+/* Physical GPIO register layout */
+struct ppc4xx_gpio {
+ __be32 or;
+ __be32 tcr;
+ __be32 osrl;
+ __be32 osrh;
+ __be32 tsrl;
+ __be32 tsrh;
+ __be32 odr;
+ __be32 ir;
+ __be32 rr1;
+ __be32 rr2;
+ __be32 rr3;
+ __be32 reserved1;
+ __be32 isr1l;
+ __be32 isr1h;
+ __be32 isr2l;
+ __be32 isr2h;
+ __be32 isr3l;
+ __be32 isr3h;
+};
+
+struct ppc4xx_gpio_chip {
+ struct of_mm_gpio_chip mm_gc;
+ spinlock_t lock;
+};
+
+/*
+ * GPIO LIB API implementation for GPIOs
+ *
+ * There are a maximum of 32 gpios in each gpio controller.
+ */
+
+static int ppc4xx_gpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+
+ return !!(in_be32(&regs->ir) & GPIO_MASK(gpio));
+}
+
+static inline void
+__ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+
+ if (val)
+ setbits32(&regs->or, GPIO_MASK(gpio));
+ else
+ clrbits32(&regs->or, GPIO_MASK(gpio));
+}
+
+static void
+ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+ struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+
+ __ppc4xx_gpio_set(gc, gpio, val);
+
+ spin_unlock_irqrestore(&chip->lock, flags);
+
+ pr_debug("%s: gpio: %d val: %d\n", __func__, gpio, val);
+}
+
+static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+ struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+
+ /* Disable open-drain function */
+ clrbits32(&regs->odr, GPIO_MASK(gpio));
+
+ /* Float the pin */
+ clrbits32(&regs->tcr, GPIO_MASK(gpio));
+
+ /* Bits 0-15 use TSRL/OSRL, bits 16-31 use TSRH/OSRH */
+ if (gpio < 16) {
+ clrbits32(&regs->osrl, GPIO_MASK2(gpio));
+ clrbits32(&regs->tsrl, GPIO_MASK2(gpio));
+ } else {
+ clrbits32(&regs->osrh, GPIO_MASK2(gpio));
+ clrbits32(&regs->tsrh, GPIO_MASK2(gpio));
+ }
+
+ spin_unlock_irqrestore(&chip->lock, flags);
+
+ return 0;
+}
+
+static int
+ppc4xx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+ struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+ unsigned long flags;
+
+ spin_lock_irqsave(&chip->lock, flags);
+
+ /* First set initial value */
+ __ppc4xx_gpio_set(gc, gpio, val);
+
+ /* Disable open-drain function */
+ clrbits32(&regs->odr, GPIO_MASK(gpio));
+
+ /* Drive the pin */
+ setbits32(&regs->tcr, GPIO_MASK(gpio));
+
+ /* Bits 0-15 use TSRL, bits 16-31 use TSRH */
+ if (gpio < 16) {
+ clrbits32(&regs->osrl, GPIO_MASK2(gpio));
+ clrbits32(&regs->tsrl, GPIO_MASK2(gpio));
+ } else {
+ clrbits32(&regs->osrh, GPIO_MASK2(gpio));
+ clrbits32(&regs->tsrh, GPIO_MASK2(gpio));
+ }
+
+ spin_unlock_irqrestore(&chip->lock, flags);
+
+ pr_debug("%s: gpio: %d val: %d\n", __func__, gpio, val);
+
+ return 0;
+}
+
+static int __init ppc4xx_add_gpiochips(void)
+{
+ struct device_node *np;
+
+ for_each_compatible_node(np, NULL, "ibm,ppc4xx-gpio") {
+ int ret;
+ struct ppc4xx_gpio_chip *ppc4xx_gc;
+ struct of_mm_gpio_chip *mm_gc;
+ struct gpio_chip *gc;
+
+ ppc4xx_gc = kzalloc(sizeof(*ppc4xx_gc), GFP_KERNEL);
+ if (!ppc4xx_gc) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ spin_lock_init(&ppc4xx_gc->lock);
+
+ mm_gc = &ppc4xx_gc->mm_gc;
+ gc = &mm_gc->gc;
+
+ gc->ngpio = 32;
+ gc->direction_input = ppc4xx_gpio_dir_in;
+ gc->direction_output = ppc4xx_gpio_dir_out;
+ gc->get = ppc4xx_gpio_get;
+ gc->set = ppc4xx_gpio_set;
+
+ ret = of_mm_gpiochip_add_data(np, mm_gc, ppc4xx_gc);
+ if (ret)
+ goto err;
+ continue;
+err:
+ pr_err("%pOF: registration failed with status %d\n", np, ret);
+ kfree(ppc4xx_gc);
+ /* try others anyway */
+ }
+ return 0;
+}
+arch_initcall(ppc4xx_add_gpiochips);
diff --git a/arch/powerpc/platforms/4xx/hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c
new file mode 100644
index 000000000..c6bd846b0
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/hsta_msi.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MSI support for PPC4xx SoCs using High Speed Transfer Assist (HSTA) for
+ * generation of the interrupt.
+ *
+ * Copyright © 2013 Alistair Popple <alistair@popple.id.au> IBM Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
+
+struct ppc4xx_hsta_msi {
+ struct device *dev;
+
+ /* The ioremapped HSTA MSI IO space */
+ u32 __iomem *data;
+
+ /* Physical address of HSTA MSI IO space */
+ u64 address;
+ struct msi_bitmap bmp;
+
+ /* An array mapping offsets to hardware IRQs */
+ int *irq_map;
+
+ /* Number of hwirqs supported */
+ int irq_count;
+};
+static struct ppc4xx_hsta_msi ppc4xx_hsta_msi;
+
+static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ struct msi_msg msg;
+ struct msi_desc *entry;
+ int irq, hwirq;
+ u64 addr;
+
+ /* We don't support MSI-X */
+ if (type == PCI_CAP_ID_MSIX) {
+ pr_debug("%s: MSI-X not supported.\n", __func__);
+ return -EINVAL;
+ }
+
+ msi_for_each_desc(entry, &dev->dev, MSI_DESC_NOTASSOCIATED) {
+ irq = msi_bitmap_alloc_hwirqs(&ppc4xx_hsta_msi.bmp, 1);
+ if (irq < 0) {
+ pr_debug("%s: Failed to allocate msi interrupt\n",
+ __func__);
+ return irq;
+ }
+
+ hwirq = ppc4xx_hsta_msi.irq_map[irq];
+ if (!hwirq) {
+ pr_err("%s: Failed mapping irq %d\n", __func__, irq);
+ return -EINVAL;
+ }
+
+ /*
+ * HSTA generates interrupts on writes to 128-bit aligned
+ * addresses.
+ */
+ addr = ppc4xx_hsta_msi.address + irq*0x10;
+ msg.address_hi = upper_32_bits(addr);
+ msg.address_lo = lower_32_bits(addr);
+
+ /* Data is not used by the HSTA. */
+ msg.data = 0;
+
+ pr_debug("%s: Setup irq %d (0x%0llx)\n", __func__, hwirq,
+ (((u64) msg.address_hi) << 32) | msg.address_lo);
+
+ if (irq_set_msi_desc(hwirq, entry)) {
+ pr_err(
+ "%s: Invalid hwirq %d specified in device tree\n",
+ __func__, hwirq);
+ msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1);
+ return -EINVAL;
+ }
+ pci_write_msi_msg(hwirq, &msg);
+ }
+
+ return 0;
+}
+
+static int hsta_find_hwirq_offset(int hwirq)
+{
+ int irq;
+
+ /* Find the offset given the hwirq */
+ for (irq = 0; irq < ppc4xx_hsta_msi.irq_count; irq++)
+ if (ppc4xx_hsta_msi.irq_map[irq] == hwirq)
+ return irq;
+
+ return -EINVAL;
+}
+
+static void hsta_teardown_msi_irqs(struct pci_dev *dev)
+{
+ struct msi_desc *entry;
+ int irq;
+
+ msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) {
+ irq = hsta_find_hwirq_offset(entry->irq);
+
+ /* entry->irq should always be in irq_map */
+ BUG_ON(irq < 0);
+ irq_set_msi_desc(entry->irq, NULL);
+ msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1);
+ pr_debug("%s: Teardown IRQ %u (index %u)\n", __func__,
+ entry->irq, irq);
+ entry->irq = 0;
+ }
+}
+
+static int hsta_msi_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct resource *mem;
+ int irq, ret, irq_count;
+ struct pci_controller *phb;
+
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!mem) {
+ dev_err(dev, "Unable to get mmio space\n");
+ return -EINVAL;
+ }
+
+ irq_count = of_irq_count(dev->of_node);
+ if (!irq_count) {
+ dev_err(dev, "Unable to find IRQ range\n");
+ return -EINVAL;
+ }
+
+ ppc4xx_hsta_msi.dev = dev;
+ ppc4xx_hsta_msi.address = mem->start;
+ ppc4xx_hsta_msi.data = ioremap(mem->start, resource_size(mem));
+ ppc4xx_hsta_msi.irq_count = irq_count;
+ if (!ppc4xx_hsta_msi.data) {
+ dev_err(dev, "Unable to map memory\n");
+ return -ENOMEM;
+ }
+
+ ret = msi_bitmap_alloc(&ppc4xx_hsta_msi.bmp, irq_count, dev->of_node);
+ if (ret)
+ goto out;
+
+ ppc4xx_hsta_msi.irq_map = kmalloc_array(irq_count, sizeof(int),
+ GFP_KERNEL);
+ if (!ppc4xx_hsta_msi.irq_map) {
+ ret = -ENOMEM;
+ goto out1;
+ }
+
+ /* Setup a mapping from irq offsets to hardware irq numbers */
+ for (irq = 0; irq < irq_count; irq++) {
+ ppc4xx_hsta_msi.irq_map[irq] =
+ irq_of_parse_and_map(dev->of_node, irq);
+ if (!ppc4xx_hsta_msi.irq_map[irq]) {
+ dev_err(dev, "Unable to map IRQ\n");
+ ret = -EINVAL;
+ goto out2;
+ }
+ }
+
+ list_for_each_entry(phb, &hose_list, list_node) {
+ phb->controller_ops.setup_msi_irqs = hsta_setup_msi_irqs;
+ phb->controller_ops.teardown_msi_irqs = hsta_teardown_msi_irqs;
+ }
+ return 0;
+
+out2:
+ kfree(ppc4xx_hsta_msi.irq_map);
+
+out1:
+ msi_bitmap_free(&ppc4xx_hsta_msi.bmp);
+
+out:
+ iounmap(ppc4xx_hsta_msi.data);
+ return ret;
+}
+
+static const struct of_device_id hsta_msi_ids[] = {
+ {
+ .compatible = "ibm,hsta-msi",
+ },
+ {}
+};
+
+static struct platform_driver hsta_msi_driver = {
+ .probe = hsta_msi_probe,
+ .driver = {
+ .name = "hsta-msi",
+ .of_match_table = hsta_msi_ids,
+ },
+};
+
+static int hsta_msi_init(void)
+{
+ return platform_driver_register(&hsta_msi_driver);
+}
+subsys_initcall(hsta_msi_init);
diff --git a/arch/powerpc/platforms/4xx/machine_check.c b/arch/powerpc/platforms/4xx/machine_check.c
new file mode 100644
index 000000000..a905da1d6
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/machine_check.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_4xx(struct pt_regs *regs)
+{
+ unsigned long reason = regs->esr;
+
+ if (reason & ESR_IMCP) {
+ printk("Instruction");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ } else
+ printk("Data");
+ printk(" machine check in kernel mode.\n");
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c
new file mode 100644
index 000000000..48626615b
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/pci.c
@@ -0,0 +1,2182 @@
+/*
+ * PCI / PCI-X / PCI-Express support for 4xx parts
+ *
+ * Copyright 2007 Ben. Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ *
+ * Most PCI Express code is coming from Stefan Roese implementation for
+ * arch/ppc in the Denx tree, slightly reworked by me.
+ *
+ * Copyright 2007 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * Some of that comes itself from a previous implementation for 440SPE only
+ * by Roland Dreier:
+ *
+ * Copyright (c) 2005 Cisco Systems. All rights reserved.
+ * Roland Dreier <rolandd@cisco.com>
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <mm/mmu_decl.h>
+
+#include "pci.h"
+
+static int dma_offset_set;
+
+#define U64_TO_U32_LOW(val) ((u32)((val) & 0x00000000ffffffffULL))
+#define U64_TO_U32_HIGH(val) ((u32)((val) >> 32))
+
+#define RES_TO_U32_LOW(val) \
+ ((sizeof(resource_size_t) > sizeof(u32)) ? U64_TO_U32_LOW(val) : (val))
+#define RES_TO_U32_HIGH(val) \
+ ((sizeof(resource_size_t) > sizeof(u32)) ? U64_TO_U32_HIGH(val) : (0))
+
+static inline int ppc440spe_revA(void)
+{
+ /* Catch both 440SPe variants, with and without RAID6 support */
+ if ((mfspr(SPRN_PVR) & 0xffefffff) == 0x53421890)
+ return 1;
+ else
+ return 0;
+}
+
+static void fixup_ppc4xx_pci_bridge(struct pci_dev *dev)
+{
+ struct pci_controller *hose;
+ struct resource *r;
+
+ if (dev->devfn != 0 || dev->bus->self != NULL)
+ return;
+
+ hose = pci_bus_to_host(dev->bus);
+ if (hose == NULL)
+ return;
+
+ if (!of_device_is_compatible(hose->dn, "ibm,plb-pciex") &&
+ !of_device_is_compatible(hose->dn, "ibm,plb-pcix") &&
+ !of_device_is_compatible(hose->dn, "ibm,plb-pci"))
+ return;
+
+ if (of_device_is_compatible(hose->dn, "ibm,plb440epx-pci") ||
+ of_device_is_compatible(hose->dn, "ibm,plb440grx-pci")) {
+ hose->indirect_type |= PPC_INDIRECT_TYPE_BROKEN_MRM;
+ }
+
+ /* Hide the PCI host BARs from the kernel as their content doesn't
+ * fit well in the resource management
+ */
+ pci_dev_for_each_resource(dev, r) {
+ r->start = r->end = 0;
+ r->flags = 0;
+ }
+
+ printk(KERN_INFO "PCI: Hiding 4xx host bridge resources %s\n",
+ pci_name(dev));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, fixup_ppc4xx_pci_bridge);
+
+static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
+ void __iomem *reg,
+ struct resource *res)
+{
+ u64 size;
+ const u32 *ranges;
+ int rlen;
+ int pna = of_n_addr_cells(hose->dn);
+ int np = pna + 5;
+
+ /* Default */
+ res->start = 0;
+ size = 0x80000000;
+ res->end = size - 1;
+ res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH;
+
+ /* Get dma-ranges property */
+ ranges = of_get_property(hose->dn, "dma-ranges", &rlen);
+ if (ranges == NULL)
+ goto out;
+
+ /* Walk it */
+ while ((rlen -= np * 4) >= 0) {
+ u32 pci_space = ranges[0];
+ u64 pci_addr = of_read_number(ranges + 1, 2);
+ u64 cpu_addr = of_translate_dma_address(hose->dn, ranges + 3);
+ size = of_read_number(ranges + pna + 3, 2);
+ ranges += np;
+ if (cpu_addr == OF_BAD_ADDR || size == 0)
+ continue;
+
+ /* We only care about memory */
+ if ((pci_space & 0x03000000) != 0x02000000)
+ continue;
+
+ /* We currently only support memory at 0, and pci_addr
+ * within 32 bits space
+ */
+ if (cpu_addr != 0 || pci_addr > 0xffffffff) {
+ printk(KERN_WARNING "%pOF: Ignored unsupported dma range"
+ " 0x%016llx...0x%016llx -> 0x%016llx\n",
+ hose->dn,
+ pci_addr, pci_addr + size - 1, cpu_addr);
+ continue;
+ }
+
+ /* Check if not prefetchable */
+ if (!(pci_space & 0x40000000))
+ res->flags &= ~IORESOURCE_PREFETCH;
+
+
+ /* Use that */
+ res->start = pci_addr;
+ /* Beware of 32 bits resources */
+ if (sizeof(resource_size_t) == sizeof(u32) &&
+ (pci_addr + size) > 0x100000000ull)
+ res->end = 0xffffffff;
+ else
+ res->end = res->start + size - 1;
+ break;
+ }
+
+ /* We only support one global DMA offset */
+ if (dma_offset_set && pci_dram_offset != res->start) {
+ printk(KERN_ERR "%pOF: dma-ranges(s) mismatch\n", hose->dn);
+ return -ENXIO;
+ }
+
+ /* Check that we can fit all of memory as we don't support
+ * DMA bounce buffers
+ */
+ if (size < total_memory) {
+ printk(KERN_ERR "%pOF: dma-ranges too small "
+ "(size=%llx total_memory=%llx)\n",
+ hose->dn, size, (u64)total_memory);
+ return -ENXIO;
+ }
+
+ /* Check we are a power of 2 size and that base is a multiple of size*/
+ if ((size & (size - 1)) != 0 ||
+ (res->start & (size - 1)) != 0) {
+ printk(KERN_ERR "%pOF: dma-ranges unaligned\n", hose->dn);
+ return -ENXIO;
+ }
+
+ /* Check that we are fully contained within 32 bits space if we are not
+ * running on a 460sx or 476fpe which have 64 bit bus addresses.
+ */
+ if (res->end > 0xffffffff &&
+ !(of_device_is_compatible(hose->dn, "ibm,plb-pciex-460sx")
+ || of_device_is_compatible(hose->dn, "ibm,plb-pciex-476fpe"))) {
+ printk(KERN_ERR "%pOF: dma-ranges outside of 32 bits space\n",
+ hose->dn);
+ return -ENXIO;
+ }
+ out:
+ dma_offset_set = 1;
+ pci_dram_offset = res->start;
+ hose->dma_window_base_cur = res->start;
+ hose->dma_window_size = resource_size(res);
+
+ printk(KERN_INFO "4xx PCI DMA offset set to 0x%08lx\n",
+ pci_dram_offset);
+ printk(KERN_INFO "4xx PCI DMA window base to 0x%016llx\n",
+ (unsigned long long)hose->dma_window_base_cur);
+ printk(KERN_INFO "DMA window size 0x%016llx\n",
+ (unsigned long long)hose->dma_window_size);
+ return 0;
+}
+
+/*
+ * 4xx PCI 2.x part
+ */
+
+static int __init ppc4xx_setup_one_pci_PMM(struct pci_controller *hose,
+ void __iomem *reg,
+ u64 plb_addr,
+ u64 pci_addr,
+ u64 size,
+ unsigned int flags,
+ int index)
+{
+ u32 ma, pcila, pciha;
+
+ /* Hack warning ! The "old" PCI 2.x cell only let us configure the low
+ * 32-bit of incoming PLB addresses. The top 4 bits of the 36-bit
+ * address are actually hard wired to a value that appears to depend
+ * on the specific SoC. For example, it's 0 on 440EP and 1 on 440EPx.
+ *
+ * The trick here is we just crop those top bits and ignore them when
+ * programming the chip. That means the device-tree has to be right
+ * for the specific part used (we don't print a warning if it's wrong
+ * but on the other hand, you'll crash quickly enough), but at least
+ * this code should work whatever the hard coded value is
+ */
+ plb_addr &= 0xffffffffull;
+
+ /* Note: Due to the above hack, the test below doesn't actually test
+ * if you address is above 4G, but it tests that address and
+ * (address + size) are both contained in the same 4G
+ */
+ if ((plb_addr + size) > 0xffffffffull || !is_power_of_2(size) ||
+ size < 0x1000 || (plb_addr & (size - 1)) != 0) {
+ printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn);
+ return -1;
+ }
+ ma = (0xffffffffu << ilog2(size)) | 1;
+ if (flags & IORESOURCE_PREFETCH)
+ ma |= 2;
+
+ pciha = RES_TO_U32_HIGH(pci_addr);
+ pcila = RES_TO_U32_LOW(pci_addr);
+
+ writel(plb_addr, reg + PCIL0_PMM0LA + (0x10 * index));
+ writel(pcila, reg + PCIL0_PMM0PCILA + (0x10 * index));
+ writel(pciha, reg + PCIL0_PMM0PCIHA + (0x10 * index));
+ writel(ma, reg + PCIL0_PMM0MA + (0x10 * index));
+
+ return 0;
+}
+
+static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
+ void __iomem *reg)
+{
+ int i, j, found_isa_hole = 0;
+
+ /* Setup outbound memory windows */
+ for (i = j = 0; i < 3; i++) {
+ struct resource *res = &hose->mem_resources[i];
+ resource_size_t offset = hose->mem_offset[i];
+
+ /* we only care about memory windows */
+ if (!(res->flags & IORESOURCE_MEM))
+ continue;
+ if (j > 2) {
+ printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn);
+ break;
+ }
+
+ /* Configure the resource */
+ if (ppc4xx_setup_one_pci_PMM(hose, reg,
+ res->start,
+ res->start - offset,
+ resource_size(res),
+ res->flags,
+ j) == 0) {
+ j++;
+
+ /* If the resource PCI address is 0 then we have our
+ * ISA memory hole
+ */
+ if (res->start == offset)
+ found_isa_hole = 1;
+ }
+ }
+
+ /* Handle ISA memory hole if not already covered */
+ if (j <= 2 && !found_isa_hole && hose->isa_mem_size)
+ if (ppc4xx_setup_one_pci_PMM(hose, reg, hose->isa_mem_phys, 0,
+ hose->isa_mem_size, 0, j) == 0)
+ printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+ hose->dn);
+}
+
+static void __init ppc4xx_configure_pci_PTMs(struct pci_controller *hose,
+ void __iomem *reg,
+ const struct resource *res)
+{
+ resource_size_t size = resource_size(res);
+ u32 sa;
+
+ /* Calculate window size */
+ sa = (0xffffffffu << ilog2(size)) | 1;
+ sa |= 0x1;
+
+ /* RAM is always at 0 local for now */
+ writel(0, reg + PCIL0_PTM1LA);
+ writel(sa, reg + PCIL0_PTM1MS);
+
+ /* Map on PCI side */
+ early_write_config_dword(hose, hose->first_busno, 0,
+ PCI_BASE_ADDRESS_1, res->start);
+ early_write_config_dword(hose, hose->first_busno, 0,
+ PCI_BASE_ADDRESS_2, 0x00000000);
+ early_write_config_word(hose, hose->first_busno, 0,
+ PCI_COMMAND, 0x0006);
+}
+
+static void __init ppc4xx_probe_pci_bridge(struct device_node *np)
+{
+ /* NYI */
+ struct resource rsrc_cfg;
+ struct resource rsrc_reg;
+ struct resource dma_window;
+ struct pci_controller *hose = NULL;
+ void __iomem *reg = NULL;
+ const int *bus_range;
+ int primary = 0;
+
+ /* Check if device is enabled */
+ if (!of_device_is_available(np)) {
+ printk(KERN_INFO "%pOF: Port disabled via device-tree\n", np);
+ return;
+ }
+
+ /* Fetch config space registers address */
+ if (of_address_to_resource(np, 0, &rsrc_cfg)) {
+ printk(KERN_ERR "%pOF: Can't get PCI config register base !",
+ np);
+ return;
+ }
+ /* Fetch host bridge internal registers address */
+ if (of_address_to_resource(np, 3, &rsrc_reg)) {
+ printk(KERN_ERR "%pOF: Can't get PCI internal register base !",
+ np);
+ return;
+ }
+
+ /* Check if primary bridge */
+ if (of_property_read_bool(np, "primary"))
+ primary = 1;
+
+ /* Get bus range if any */
+ bus_range = of_get_property(np, "bus-range", NULL);
+
+ /* Map registers */
+ reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg));
+ if (reg == NULL) {
+ printk(KERN_ERR "%pOF: Can't map registers !", np);
+ goto fail;
+ }
+
+ /* Allocate the host controller data structure */
+ hose = pcibios_alloc_controller(np);
+ if (!hose)
+ goto fail;
+
+ hose->first_busno = bus_range ? bus_range[0] : 0x0;
+ hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+ /* Setup config space */
+ setup_indirect_pci(hose, rsrc_cfg.start, rsrc_cfg.start + 0x4, 0);
+
+ /* Disable all windows */
+ writel(0, reg + PCIL0_PMM0MA);
+ writel(0, reg + PCIL0_PMM1MA);
+ writel(0, reg + PCIL0_PMM2MA);
+ writel(0, reg + PCIL0_PTM1MS);
+ writel(0, reg + PCIL0_PTM2MS);
+
+ /* Parse outbound mapping resources */
+ pci_process_bridge_OF_ranges(hose, np, primary);
+
+ /* Parse inbound mapping resources */
+ if (ppc4xx_parse_dma_ranges(hose, reg, &dma_window) != 0)
+ goto fail;
+
+ /* Configure outbound ranges POMs */
+ ppc4xx_configure_pci_PMMs(hose, reg);
+
+ /* Configure inbound ranges PIMs */
+ ppc4xx_configure_pci_PTMs(hose, reg, &dma_window);
+
+ /* We don't need the registers anymore */
+ iounmap(reg);
+ return;
+
+ fail:
+ if (hose)
+ pcibios_free_controller(hose);
+ if (reg)
+ iounmap(reg);
+}
+
+/*
+ * 4xx PCI-X part
+ */
+
+static int __init ppc4xx_setup_one_pcix_POM(struct pci_controller *hose,
+ void __iomem *reg,
+ u64 plb_addr,
+ u64 pci_addr,
+ u64 size,
+ unsigned int flags,
+ int index)
+{
+ u32 lah, lal, pciah, pcial, sa;
+
+ if (!is_power_of_2(size) || size < 0x1000 ||
+ (plb_addr & (size - 1)) != 0) {
+ printk(KERN_WARNING "%pOF: Resource out of range\n",
+ hose->dn);
+ return -1;
+ }
+
+ /* Calculate register values */
+ lah = RES_TO_U32_HIGH(plb_addr);
+ lal = RES_TO_U32_LOW(plb_addr);
+ pciah = RES_TO_U32_HIGH(pci_addr);
+ pcial = RES_TO_U32_LOW(pci_addr);
+ sa = (0xffffffffu << ilog2(size)) | 0x1;
+
+ /* Program register values */
+ if (index == 0) {
+ writel(lah, reg + PCIX0_POM0LAH);
+ writel(lal, reg + PCIX0_POM0LAL);
+ writel(pciah, reg + PCIX0_POM0PCIAH);
+ writel(pcial, reg + PCIX0_POM0PCIAL);
+ writel(sa, reg + PCIX0_POM0SA);
+ } else {
+ writel(lah, reg + PCIX0_POM1LAH);
+ writel(lal, reg + PCIX0_POM1LAL);
+ writel(pciah, reg + PCIX0_POM1PCIAH);
+ writel(pcial, reg + PCIX0_POM1PCIAL);
+ writel(sa, reg + PCIX0_POM1SA);
+ }
+
+ return 0;
+}
+
+static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
+ void __iomem *reg)
+{
+ int i, j, found_isa_hole = 0;
+
+ /* Setup outbound memory windows */
+ for (i = j = 0; i < 3; i++) {
+ struct resource *res = &hose->mem_resources[i];
+ resource_size_t offset = hose->mem_offset[i];
+
+ /* we only care about memory windows */
+ if (!(res->flags & IORESOURCE_MEM))
+ continue;
+ if (j > 1) {
+ printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn);
+ break;
+ }
+
+ /* Configure the resource */
+ if (ppc4xx_setup_one_pcix_POM(hose, reg,
+ res->start,
+ res->start - offset,
+ resource_size(res),
+ res->flags,
+ j) == 0) {
+ j++;
+
+ /* If the resource PCI address is 0 then we have our
+ * ISA memory hole
+ */
+ if (res->start == offset)
+ found_isa_hole = 1;
+ }
+ }
+
+ /* Handle ISA memory hole if not already covered */
+ if (j <= 1 && !found_isa_hole && hose->isa_mem_size)
+ if (ppc4xx_setup_one_pcix_POM(hose, reg, hose->isa_mem_phys, 0,
+ hose->isa_mem_size, 0, j) == 0)
+ printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+ hose->dn);
+}
+
+static void __init ppc4xx_configure_pcix_PIMs(struct pci_controller *hose,
+ void __iomem *reg,
+ const struct resource *res,
+ int big_pim,
+ int enable_msi_hole)
+{
+ resource_size_t size = resource_size(res);
+ u32 sa;
+
+ /* RAM is always at 0 */
+ writel(0x00000000, reg + PCIX0_PIM0LAH);
+ writel(0x00000000, reg + PCIX0_PIM0LAL);
+
+ /* Calculate window size */
+ sa = (0xffffffffu << ilog2(size)) | 1;
+ sa |= 0x1;
+ if (res->flags & IORESOURCE_PREFETCH)
+ sa |= 0x2;
+ if (enable_msi_hole)
+ sa |= 0x4;
+ writel(sa, reg + PCIX0_PIM0SA);
+ if (big_pim)
+ writel(0xffffffff, reg + PCIX0_PIM0SAH);
+
+ /* Map on PCI side */
+ writel(0x00000000, reg + PCIX0_BAR0H);
+ writel(res->start, reg + PCIX0_BAR0L);
+ writew(0x0006, reg + PCIX0_COMMAND);
+}
+
+static void __init ppc4xx_probe_pcix_bridge(struct device_node *np)
+{
+ struct resource rsrc_cfg;
+ struct resource rsrc_reg;
+ struct resource dma_window;
+ struct pci_controller *hose = NULL;
+ void __iomem *reg = NULL;
+ const int *bus_range;
+ int big_pim, msi, primary;
+
+ /* Fetch config space registers address */
+ if (of_address_to_resource(np, 0, &rsrc_cfg)) {
+ printk(KERN_ERR "%pOF: Can't get PCI-X config register base !",
+ np);
+ return;
+ }
+ /* Fetch host bridge internal registers address */
+ if (of_address_to_resource(np, 3, &rsrc_reg)) {
+ printk(KERN_ERR "%pOF: Can't get PCI-X internal register base !",
+ np);
+ return;
+ }
+
+ /* Check if it supports large PIMs (440GX) */
+ big_pim = of_property_read_bool(np, "large-inbound-windows");
+
+ /* Check if we should enable MSIs inbound hole */
+ msi = of_property_read_bool(np, "enable-msi-hole");
+
+ /* Check if primary bridge */
+ primary = of_property_read_bool(np, "primary");
+
+ /* Get bus range if any */
+ bus_range = of_get_property(np, "bus-range", NULL);
+
+ /* Map registers */
+ reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg));
+ if (reg == NULL) {
+ printk(KERN_ERR "%pOF: Can't map registers !", np);
+ goto fail;
+ }
+
+ /* Allocate the host controller data structure */
+ hose = pcibios_alloc_controller(np);
+ if (!hose)
+ goto fail;
+
+ hose->first_busno = bus_range ? bus_range[0] : 0x0;
+ hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+ /* Setup config space */
+ setup_indirect_pci(hose, rsrc_cfg.start, rsrc_cfg.start + 0x4,
+ PPC_INDIRECT_TYPE_SET_CFG_TYPE);
+
+ /* Disable all windows */
+ writel(0, reg + PCIX0_POM0SA);
+ writel(0, reg + PCIX0_POM1SA);
+ writel(0, reg + PCIX0_POM2SA);
+ writel(0, reg + PCIX0_PIM0SA);
+ writel(0, reg + PCIX0_PIM1SA);
+ writel(0, reg + PCIX0_PIM2SA);
+ if (big_pim) {
+ writel(0, reg + PCIX0_PIM0SAH);
+ writel(0, reg + PCIX0_PIM2SAH);
+ }
+
+ /* Parse outbound mapping resources */
+ pci_process_bridge_OF_ranges(hose, np, primary);
+
+ /* Parse inbound mapping resources */
+ if (ppc4xx_parse_dma_ranges(hose, reg, &dma_window) != 0)
+ goto fail;
+
+ /* Configure outbound ranges POMs */
+ ppc4xx_configure_pcix_POMs(hose, reg);
+
+ /* Configure inbound ranges PIMs */
+ ppc4xx_configure_pcix_PIMs(hose, reg, &dma_window, big_pim, msi);
+
+ /* We don't need the registers anymore */
+ iounmap(reg);
+ return;
+
+ fail:
+ if (hose)
+ pcibios_free_controller(hose);
+ if (reg)
+ iounmap(reg);
+}
+
+#ifdef CONFIG_PPC4xx_PCI_EXPRESS
+
+/*
+ * 4xx PCI-Express part
+ *
+ * We support 3 parts currently based on the compatible property:
+ *
+ * ibm,plb-pciex-440spe
+ * ibm,plb-pciex-405ex
+ * ibm,plb-pciex-460ex
+ *
+ * Anything else will be rejected for now as they are all subtly
+ * different unfortunately.
+ *
+ */
+
+#define MAX_PCIE_BUS_MAPPED 0x40
+
+struct ppc4xx_pciex_port
+{
+ struct pci_controller *hose;
+ struct device_node *node;
+ unsigned int index;
+ int endpoint;
+ int link;
+ int has_ibpre;
+ unsigned int sdr_base;
+ dcr_host_t dcrs;
+ struct resource cfg_space;
+ struct resource utl_regs;
+ void __iomem *utl_base;
+};
+
+static struct ppc4xx_pciex_port *ppc4xx_pciex_ports;
+static unsigned int ppc4xx_pciex_port_count;
+
+struct ppc4xx_pciex_hwops
+{
+ bool want_sdr;
+ int (*core_init)(struct device_node *np);
+ int (*port_init_hw)(struct ppc4xx_pciex_port *port);
+ int (*setup_utl)(struct ppc4xx_pciex_port *port);
+ void (*check_link)(struct ppc4xx_pciex_port *port);
+};
+
+static struct ppc4xx_pciex_hwops *ppc4xx_pciex_hwops;
+
+static int __init ppc4xx_pciex_wait_on_sdr(struct ppc4xx_pciex_port *port,
+ unsigned int sdr_offset,
+ unsigned int mask,
+ unsigned int value,
+ int timeout_ms)
+{
+ u32 val;
+
+ while(timeout_ms--) {
+ val = mfdcri(SDR0, port->sdr_base + sdr_offset);
+ if ((val & mask) == value) {
+ pr_debug("PCIE%d: Wait on SDR %x success with tm %d (%08x)\n",
+ port->index, sdr_offset, timeout_ms, val);
+ return 0;
+ }
+ msleep(1);
+ }
+ return -1;
+}
+
+static int __init ppc4xx_pciex_port_reset_sdr(struct ppc4xx_pciex_port *port)
+{
+ /* Wait for reset to complete */
+ if (ppc4xx_pciex_wait_on_sdr(port, PESDRn_RCSSTS, 1 << 20, 0, 10)) {
+ printk(KERN_WARNING "PCIE%d: PGRST failed\n",
+ port->index);
+ return -1;
+ }
+ return 0;
+}
+
+
+static void __init ppc4xx_pciex_check_link_sdr(struct ppc4xx_pciex_port *port)
+{
+ printk(KERN_INFO "PCIE%d: Checking link...\n", port->index);
+
+ /* Check for card presence detect if supported, if not, just wait for
+ * link unconditionally.
+ *
+ * note that we don't fail if there is no link, we just filter out
+ * config space accesses. That way, it will be easier to implement
+ * hotplug later on.
+ */
+ if (!port->has_ibpre ||
+ !ppc4xx_pciex_wait_on_sdr(port, PESDRn_LOOP,
+ 1 << 28, 1 << 28, 100)) {
+ printk(KERN_INFO
+ "PCIE%d: Device detected, waiting for link...\n",
+ port->index);
+ if (ppc4xx_pciex_wait_on_sdr(port, PESDRn_LOOP,
+ 0x1000, 0x1000, 2000))
+ printk(KERN_WARNING
+ "PCIE%d: Link up failed\n", port->index);
+ else {
+ printk(KERN_INFO
+ "PCIE%d: link is up !\n", port->index);
+ port->link = 1;
+ }
+ } else
+ printk(KERN_INFO "PCIE%d: No device detected.\n", port->index);
+}
+
+#ifdef CONFIG_44x
+
+/* Check various reset bits of the 440SPe PCIe core */
+static int __init ppc440spe_pciex_check_reset(struct device_node *np)
+{
+ u32 valPE0, valPE1, valPE2;
+ int err = 0;
+
+ /* SDR0_PEGPLLLCT1 reset */
+ if (!(mfdcri(SDR0, PESDR0_PLLLCT1) & 0x01000000)) {
+ /*
+ * the PCIe core was probably already initialised
+ * by firmware - let's re-reset RCSSET regs
+ *
+ * -- Shouldn't we also re-reset the whole thing ? -- BenH
+ */
+ pr_debug("PCIE: SDR0_PLLLCT1 already reset.\n");
+ mtdcri(SDR0, PESDR0_440SPE_RCSSET, 0x01010000);
+ mtdcri(SDR0, PESDR1_440SPE_RCSSET, 0x01010000);
+ mtdcri(SDR0, PESDR2_440SPE_RCSSET, 0x01010000);
+ }
+
+ valPE0 = mfdcri(SDR0, PESDR0_440SPE_RCSSET);
+ valPE1 = mfdcri(SDR0, PESDR1_440SPE_RCSSET);
+ valPE2 = mfdcri(SDR0, PESDR2_440SPE_RCSSET);
+
+ /* SDR0_PExRCSSET rstgu */
+ if (!(valPE0 & 0x01000000) ||
+ !(valPE1 & 0x01000000) ||
+ !(valPE2 & 0x01000000)) {
+ printk(KERN_INFO "PCIE: SDR0_PExRCSSET rstgu error\n");
+ err = -1;
+ }
+
+ /* SDR0_PExRCSSET rstdl */
+ if (!(valPE0 & 0x00010000) ||
+ !(valPE1 & 0x00010000) ||
+ !(valPE2 & 0x00010000)) {
+ printk(KERN_INFO "PCIE: SDR0_PExRCSSET rstdl error\n");
+ err = -1;
+ }
+
+ /* SDR0_PExRCSSET rstpyn */
+ if ((valPE0 & 0x00001000) ||
+ (valPE1 & 0x00001000) ||
+ (valPE2 & 0x00001000)) {
+ printk(KERN_INFO "PCIE: SDR0_PExRCSSET rstpyn error\n");
+ err = -1;
+ }
+
+ /* SDR0_PExRCSSET hldplb */
+ if ((valPE0 & 0x10000000) ||
+ (valPE1 & 0x10000000) ||
+ (valPE2 & 0x10000000)) {
+ printk(KERN_INFO "PCIE: SDR0_PExRCSSET hldplb error\n");
+ err = -1;
+ }
+
+ /* SDR0_PExRCSSET rdy */
+ if ((valPE0 & 0x00100000) ||
+ (valPE1 & 0x00100000) ||
+ (valPE2 & 0x00100000)) {
+ printk(KERN_INFO "PCIE: SDR0_PExRCSSET rdy error\n");
+ err = -1;
+ }
+
+ /* SDR0_PExRCSSET shutdown */
+ if ((valPE0 & 0x00000100) ||
+ (valPE1 & 0x00000100) ||
+ (valPE2 & 0x00000100)) {
+ printk(KERN_INFO "PCIE: SDR0_PExRCSSET shutdown error\n");
+ err = -1;
+ }
+
+ return err;
+}
+
+/* Global PCIe core initializations for 440SPe core */
+static int __init ppc440spe_pciex_core_init(struct device_node *np)
+{
+ int time_out = 20;
+
+ /* Set PLL clock receiver to LVPECL */
+ dcri_clrset(SDR0, PESDR0_PLLLCT1, 0, 1 << 28);
+
+ /* Shouldn't we do all the calibration stuff etc... here ? */
+ if (ppc440spe_pciex_check_reset(np))
+ return -ENXIO;
+
+ if (!(mfdcri(SDR0, PESDR0_PLLLCT2) & 0x10000)) {
+ printk(KERN_INFO "PCIE: PESDR_PLLCT2 resistance calibration "
+ "failed (0x%08x)\n",
+ mfdcri(SDR0, PESDR0_PLLLCT2));
+ return -1;
+ }
+
+ /* De-assert reset of PCIe PLL, wait for lock */
+ dcri_clrset(SDR0, PESDR0_PLLLCT1, 1 << 24, 0);
+ udelay(3);
+
+ while (time_out) {
+ if (!(mfdcri(SDR0, PESDR0_PLLLCT3) & 0x10000000)) {
+ time_out--;
+ udelay(1);
+ } else
+ break;
+ }
+ if (!time_out) {
+ printk(KERN_INFO "PCIE: VCO output not locked\n");
+ return -1;
+ }
+
+ pr_debug("PCIE initialization OK\n");
+
+ return 3;
+}
+
+static int __init ppc440spe_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+ u32 val = 1 << 24;
+
+ if (port->endpoint)
+ val = PTYPE_LEGACY_ENDPOINT << 20;
+ else
+ val = PTYPE_ROOT_PORT << 20;
+
+ if (port->index == 0)
+ val |= LNKW_X8 << 12;
+ else
+ val |= LNKW_X4 << 12;
+
+ mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET, val);
+ mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, 0x20222222);
+ if (ppc440spe_revA())
+ mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x11000000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL0SET1, 0x35000000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL1SET1, 0x35000000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL2SET1, 0x35000000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL3SET1, 0x35000000);
+ if (port->index == 0) {
+ mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL4SET1,
+ 0x35000000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL5SET1,
+ 0x35000000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL6SET1,
+ 0x35000000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_440SPE_HSSL7SET1,
+ 0x35000000);
+ }
+ dcri_clrset(SDR0, port->sdr_base + PESDRn_RCSSET,
+ (1 << 24) | (1 << 16), 1 << 12);
+
+ return ppc4xx_pciex_port_reset_sdr(port);
+}
+
+static int __init ppc440speA_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+ return ppc440spe_pciex_init_port_hw(port);
+}
+
+static int __init ppc440speB_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+ int rc = ppc440spe_pciex_init_port_hw(port);
+
+ port->has_ibpre = 1;
+
+ return rc;
+}
+
+static int ppc440speA_pciex_init_utl(struct ppc4xx_pciex_port *port)
+{
+ /* XXX Check what that value means... I hate magic */
+ dcr_write(port->dcrs, DCRO_PEGPL_SPECIAL, 0x68782800);
+
+ /*
+ * Set buffer allocations and then assert VRB and TXE.
+ */
+ out_be32(port->utl_base + PEUTL_OUTTR, 0x08000000);
+ out_be32(port->utl_base + PEUTL_INTR, 0x02000000);
+ out_be32(port->utl_base + PEUTL_OPDBSZ, 0x10000000);
+ out_be32(port->utl_base + PEUTL_PBBSZ, 0x53000000);
+ out_be32(port->utl_base + PEUTL_IPHBSZ, 0x08000000);
+ out_be32(port->utl_base + PEUTL_IPDBSZ, 0x10000000);
+ out_be32(port->utl_base + PEUTL_RCIRQEN, 0x00f00000);
+ out_be32(port->utl_base + PEUTL_PCTL, 0x80800066);
+
+ return 0;
+}
+
+static int ppc440speB_pciex_init_utl(struct ppc4xx_pciex_port *port)
+{
+ /* Report CRS to the operating system */
+ out_be32(port->utl_base + PEUTL_PBCTL, 0x08000000);
+
+ return 0;
+}
+
+static struct ppc4xx_pciex_hwops ppc440speA_pcie_hwops __initdata =
+{
+ .want_sdr = true,
+ .core_init = ppc440spe_pciex_core_init,
+ .port_init_hw = ppc440speA_pciex_init_port_hw,
+ .setup_utl = ppc440speA_pciex_init_utl,
+ .check_link = ppc4xx_pciex_check_link_sdr,
+};
+
+static struct ppc4xx_pciex_hwops ppc440speB_pcie_hwops __initdata =
+{
+ .want_sdr = true,
+ .core_init = ppc440spe_pciex_core_init,
+ .port_init_hw = ppc440speB_pciex_init_port_hw,
+ .setup_utl = ppc440speB_pciex_init_utl,
+ .check_link = ppc4xx_pciex_check_link_sdr,
+};
+
+static int __init ppc460ex_pciex_core_init(struct device_node *np)
+{
+ /* Nothing to do, return 2 ports */
+ return 2;
+}
+
+static int __init ppc460ex_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+ u32 val;
+ u32 utlset1;
+
+ if (port->endpoint)
+ val = PTYPE_LEGACY_ENDPOINT << 20;
+ else
+ val = PTYPE_ROOT_PORT << 20;
+
+ if (port->index == 0) {
+ val |= LNKW_X1 << 12;
+ utlset1 = 0x20000000;
+ } else {
+ val |= LNKW_X4 << 12;
+ utlset1 = 0x20101101;
+ }
+
+ mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET, val);
+ mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, utlset1);
+ mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x01210000);
+
+ switch (port->index) {
+ case 0:
+ mtdcri(SDR0, PESDR0_460EX_L0CDRCTL, 0x00003230);
+ mtdcri(SDR0, PESDR0_460EX_L0DRV, 0x00000130);
+ mtdcri(SDR0, PESDR0_460EX_L0CLK, 0x00000006);
+
+ mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST,0x10000000);
+ break;
+
+ case 1:
+ mtdcri(SDR0, PESDR1_460EX_L0CDRCTL, 0x00003230);
+ mtdcri(SDR0, PESDR1_460EX_L1CDRCTL, 0x00003230);
+ mtdcri(SDR0, PESDR1_460EX_L2CDRCTL, 0x00003230);
+ mtdcri(SDR0, PESDR1_460EX_L3CDRCTL, 0x00003230);
+ mtdcri(SDR0, PESDR1_460EX_L0DRV, 0x00000130);
+ mtdcri(SDR0, PESDR1_460EX_L1DRV, 0x00000130);
+ mtdcri(SDR0, PESDR1_460EX_L2DRV, 0x00000130);
+ mtdcri(SDR0, PESDR1_460EX_L3DRV, 0x00000130);
+ mtdcri(SDR0, PESDR1_460EX_L0CLK, 0x00000006);
+ mtdcri(SDR0, PESDR1_460EX_L1CLK, 0x00000006);
+ mtdcri(SDR0, PESDR1_460EX_L2CLK, 0x00000006);
+ mtdcri(SDR0, PESDR1_460EX_L3CLK, 0x00000006);
+
+ mtdcri(SDR0, PESDR1_460EX_PHY_CTL_RST,0x10000000);
+ break;
+ }
+
+ mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET,
+ mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) |
+ (PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTPYN));
+
+ /* Poll for PHY reset */
+ /* XXX FIXME add timeout */
+ switch (port->index) {
+ case 0:
+ while (!(mfdcri(SDR0, PESDR0_460EX_RSTSTA) & 0x1))
+ udelay(10);
+ break;
+ case 1:
+ while (!(mfdcri(SDR0, PESDR1_460EX_RSTSTA) & 0x1))
+ udelay(10);
+ break;
+ }
+
+ mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET,
+ (mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) &
+ ~(PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTDL)) |
+ PESDRx_RCSSET_RSTPYN);
+
+ port->has_ibpre = 1;
+
+ return ppc4xx_pciex_port_reset_sdr(port);
+}
+
+static int ppc460ex_pciex_init_utl(struct ppc4xx_pciex_port *port)
+{
+ dcr_write(port->dcrs, DCRO_PEGPL_SPECIAL, 0x0);
+
+ /*
+ * Set buffer allocations and then assert VRB and TXE.
+ */
+ out_be32(port->utl_base + PEUTL_PBCTL, 0x0800000c);
+ out_be32(port->utl_base + PEUTL_OUTTR, 0x08000000);
+ out_be32(port->utl_base + PEUTL_INTR, 0x02000000);
+ out_be32(port->utl_base + PEUTL_OPDBSZ, 0x04000000);
+ out_be32(port->utl_base + PEUTL_PBBSZ, 0x00000000);
+ out_be32(port->utl_base + PEUTL_IPHBSZ, 0x02000000);
+ out_be32(port->utl_base + PEUTL_IPDBSZ, 0x04000000);
+ out_be32(port->utl_base + PEUTL_RCIRQEN,0x00f00000);
+ out_be32(port->utl_base + PEUTL_PCTL, 0x80800066);
+
+ return 0;
+}
+
+static struct ppc4xx_pciex_hwops ppc460ex_pcie_hwops __initdata =
+{
+ .want_sdr = true,
+ .core_init = ppc460ex_pciex_core_init,
+ .port_init_hw = ppc460ex_pciex_init_port_hw,
+ .setup_utl = ppc460ex_pciex_init_utl,
+ .check_link = ppc4xx_pciex_check_link_sdr,
+};
+
+static int __init apm821xx_pciex_core_init(struct device_node *np)
+{
+ /* Return the number of pcie port */
+ return 1;
+}
+
+static int __init apm821xx_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+ u32 val;
+
+ /*
+ * Do a software reset on PCIe ports.
+ * This code is to fix the issue that pci drivers doesn't re-assign
+ * bus number for PCIE devices after Uboot
+ * scanned and configured all the buses (eg. PCIE NIC IntelPro/1000
+ * PT quad port, SAS LSI 1064E)
+ */
+
+ mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST, 0x0);
+ mdelay(10);
+
+ if (port->endpoint)
+ val = PTYPE_LEGACY_ENDPOINT << 20;
+ else
+ val = PTYPE_ROOT_PORT << 20;
+
+ val |= LNKW_X1 << 12;
+
+ mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET, val);
+ mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, 0x00000000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x01010000);
+
+ mtdcri(SDR0, PESDR0_460EX_L0CDRCTL, 0x00003230);
+ mtdcri(SDR0, PESDR0_460EX_L0DRV, 0x00000130);
+ mtdcri(SDR0, PESDR0_460EX_L0CLK, 0x00000006);
+
+ mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST, 0x10000000);
+ mdelay(50);
+ mtdcri(SDR0, PESDR0_460EX_PHY_CTL_RST, 0x30000000);
+
+ mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET,
+ mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) |
+ (PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTPYN));
+
+ /* Poll for PHY reset */
+ val = PESDR0_460EX_RSTSTA - port->sdr_base;
+ if (ppc4xx_pciex_wait_on_sdr(port, val, 0x1, 1, 100)) {
+ printk(KERN_WARNING "%s: PCIE: Can't reset PHY\n", __func__);
+ return -EBUSY;
+ } else {
+ mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET,
+ (mfdcri(SDR0, port->sdr_base + PESDRn_RCSSET) &
+ ~(PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTDL)) |
+ PESDRx_RCSSET_RSTPYN);
+
+ port->has_ibpre = 1;
+ return 0;
+ }
+}
+
+static struct ppc4xx_pciex_hwops apm821xx_pcie_hwops __initdata = {
+ .want_sdr = true,
+ .core_init = apm821xx_pciex_core_init,
+ .port_init_hw = apm821xx_pciex_init_port_hw,
+ .setup_utl = ppc460ex_pciex_init_utl,
+ .check_link = ppc4xx_pciex_check_link_sdr,
+};
+
+static int __init ppc460sx_pciex_core_init(struct device_node *np)
+{
+ /* HSS drive amplitude */
+ mtdcri(SDR0, PESDR0_460SX_HSSL0DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR0_460SX_HSSL1DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR0_460SX_HSSL2DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR0_460SX_HSSL3DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR0_460SX_HSSL4DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR0_460SX_HSSL5DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR0_460SX_HSSL6DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR0_460SX_HSSL7DAMP, 0xB9843211);
+
+ mtdcri(SDR0, PESDR1_460SX_HSSL0DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR1_460SX_HSSL1DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR1_460SX_HSSL2DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR1_460SX_HSSL3DAMP, 0xB9843211);
+
+ mtdcri(SDR0, PESDR2_460SX_HSSL0DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR2_460SX_HSSL1DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR2_460SX_HSSL2DAMP, 0xB9843211);
+ mtdcri(SDR0, PESDR2_460SX_HSSL3DAMP, 0xB9843211);
+
+ /* HSS TX pre-emphasis */
+ mtdcri(SDR0, PESDR0_460SX_HSSL0COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR0_460SX_HSSL1COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR0_460SX_HSSL2COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR0_460SX_HSSL3COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR0_460SX_HSSL4COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR0_460SX_HSSL5COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR0_460SX_HSSL6COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR0_460SX_HSSL7COEFA, 0xDCB98987);
+
+ mtdcri(SDR0, PESDR1_460SX_HSSL0COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR1_460SX_HSSL1COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR1_460SX_HSSL2COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR1_460SX_HSSL3COEFA, 0xDCB98987);
+
+ mtdcri(SDR0, PESDR2_460SX_HSSL0COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR2_460SX_HSSL1COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR2_460SX_HSSL2COEFA, 0xDCB98987);
+ mtdcri(SDR0, PESDR2_460SX_HSSL3COEFA, 0xDCB98987);
+
+ /* HSS TX calibration control */
+ mtdcri(SDR0, PESDR0_460SX_HSSL1CALDRV, 0x22222222);
+ mtdcri(SDR0, PESDR1_460SX_HSSL1CALDRV, 0x22220000);
+ mtdcri(SDR0, PESDR2_460SX_HSSL1CALDRV, 0x22220000);
+
+ /* HSS TX slew control */
+ mtdcri(SDR0, PESDR0_460SX_HSSSLEW, 0xFFFFFFFF);
+ mtdcri(SDR0, PESDR1_460SX_HSSSLEW, 0xFFFF0000);
+ mtdcri(SDR0, PESDR2_460SX_HSSSLEW, 0xFFFF0000);
+
+ /* Set HSS PRBS enabled */
+ mtdcri(SDR0, PESDR0_460SX_HSSCTLSET, 0x00001130);
+ mtdcri(SDR0, PESDR2_460SX_HSSCTLSET, 0x00001130);
+
+ udelay(100);
+
+ /* De-assert PLLRESET */
+ dcri_clrset(SDR0, PESDR0_PLLLCT2, 0x00000100, 0);
+
+ /* Reset DL, UTL, GPL before configuration */
+ mtdcri(SDR0, PESDR0_460SX_RCSSET,
+ PESDRx_RCSSET_RSTDL | PESDRx_RCSSET_RSTGU);
+ mtdcri(SDR0, PESDR1_460SX_RCSSET,
+ PESDRx_RCSSET_RSTDL | PESDRx_RCSSET_RSTGU);
+ mtdcri(SDR0, PESDR2_460SX_RCSSET,
+ PESDRx_RCSSET_RSTDL | PESDRx_RCSSET_RSTGU);
+
+ udelay(100);
+
+ /*
+ * If bifurcation is not enabled, u-boot would have disabled the
+ * third PCIe port
+ */
+ if (((mfdcri(SDR0, PESDR1_460SX_HSSCTLSET) & 0x00000001) ==
+ 0x00000001)) {
+ printk(KERN_INFO "PCI: PCIE bifurcation setup successfully.\n");
+ printk(KERN_INFO "PCI: Total 3 PCIE ports are present\n");
+ return 3;
+ }
+
+ printk(KERN_INFO "PCI: Total 2 PCIE ports are present\n");
+ return 2;
+}
+
+static int __init ppc460sx_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+
+ if (port->endpoint)
+ dcri_clrset(SDR0, port->sdr_base + PESDRn_UTLSET2,
+ 0x01000000, 0);
+ else
+ dcri_clrset(SDR0, port->sdr_base + PESDRn_UTLSET2,
+ 0, 0x01000000);
+
+ dcri_clrset(SDR0, port->sdr_base + PESDRn_RCSSET,
+ (PESDRx_RCSSET_RSTGU | PESDRx_RCSSET_RSTDL),
+ PESDRx_RCSSET_RSTPYN);
+
+ port->has_ibpre = 1;
+
+ return ppc4xx_pciex_port_reset_sdr(port);
+}
+
+static int ppc460sx_pciex_init_utl(struct ppc4xx_pciex_port *port)
+{
+ /* Max 128 Bytes */
+ out_be32 (port->utl_base + PEUTL_PBBSZ, 0x00000000);
+ /* Assert VRB and TXE - per datasheet turn off addr validation */
+ out_be32(port->utl_base + PEUTL_PCTL, 0x80800000);
+ return 0;
+}
+
+static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port)
+{
+ void __iomem *mbase;
+ int attempt = 50;
+
+ port->link = 0;
+
+ mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000);
+ if (mbase == NULL) {
+ printk(KERN_ERR "%pOF: Can't map internal config space !",
+ port->node);
+ return;
+ }
+
+ while (attempt && (0 == (in_le32(mbase + PECFG_460SX_DLLSTA)
+ & PECFG_460SX_DLLSTA_LINKUP))) {
+ attempt--;
+ mdelay(10);
+ }
+ if (attempt)
+ port->link = 1;
+ iounmap(mbase);
+}
+
+static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = {
+ .want_sdr = true,
+ .core_init = ppc460sx_pciex_core_init,
+ .port_init_hw = ppc460sx_pciex_init_port_hw,
+ .setup_utl = ppc460sx_pciex_init_utl,
+ .check_link = ppc460sx_pciex_check_link,
+};
+
+#endif /* CONFIG_44x */
+
+#ifdef CONFIG_40x
+
+static int __init ppc405ex_pciex_core_init(struct device_node *np)
+{
+ /* Nothing to do, return 2 ports */
+ return 2;
+}
+
+static void __init ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port)
+{
+ /* Assert the PE0_PHY reset */
+ mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01010000);
+ msleep(1);
+
+ /* deassert the PE0_hotreset */
+ if (port->endpoint)
+ mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01111000);
+ else
+ mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01101000);
+
+ /* poll for phy !reset */
+ /* XXX FIXME add timeout */
+ while (!(mfdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSTA) & 0x00001000))
+ ;
+
+ /* deassert the PE0_gpl_utl_reset */
+ mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x00101000);
+}
+
+static int __init ppc405ex_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
+{
+ u32 val;
+
+ if (port->endpoint)
+ val = PTYPE_LEGACY_ENDPOINT;
+ else
+ val = PTYPE_ROOT_PORT;
+
+ mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET,
+ 1 << 24 | val << 20 | LNKW_X1 << 12);
+
+ mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, 0x00000000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x01010000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSET1, 0x720F0000);
+ mtdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSET2, 0x70600003);
+
+ /*
+ * Only reset the PHY when no link is currently established.
+ * This is for the Atheros PCIe board which has problems to establish
+ * the link (again) after this PHY reset. All other currently tested
+ * PCIe boards don't show this problem.
+ * This has to be re-tested and fixed in a later release!
+ */
+ val = mfdcri(SDR0, port->sdr_base + PESDRn_LOOP);
+ if (!(val & 0x00001000))
+ ppc405ex_pcie_phy_reset(port);
+
+ dcr_write(port->dcrs, DCRO_PEGPL_CFG, 0x10000000); /* guarded on */
+
+ port->has_ibpre = 1;
+
+ return ppc4xx_pciex_port_reset_sdr(port);
+}
+
+static int ppc405ex_pciex_init_utl(struct ppc4xx_pciex_port *port)
+{
+ dcr_write(port->dcrs, DCRO_PEGPL_SPECIAL, 0x0);
+
+ /*
+ * Set buffer allocations and then assert VRB and TXE.
+ */
+ out_be32(port->utl_base + PEUTL_OUTTR, 0x02000000);
+ out_be32(port->utl_base + PEUTL_INTR, 0x02000000);
+ out_be32(port->utl_base + PEUTL_OPDBSZ, 0x04000000);
+ out_be32(port->utl_base + PEUTL_PBBSZ, 0x21000000);
+ out_be32(port->utl_base + PEUTL_IPHBSZ, 0x02000000);
+ out_be32(port->utl_base + PEUTL_IPDBSZ, 0x04000000);
+ out_be32(port->utl_base + PEUTL_RCIRQEN, 0x00f00000);
+ out_be32(port->utl_base + PEUTL_PCTL, 0x80800066);
+
+ out_be32(port->utl_base + PEUTL_PBCTL, 0x08000000);
+
+ return 0;
+}
+
+static struct ppc4xx_pciex_hwops ppc405ex_pcie_hwops __initdata =
+{
+ .want_sdr = true,
+ .core_init = ppc405ex_pciex_core_init,
+ .port_init_hw = ppc405ex_pciex_init_port_hw,
+ .setup_utl = ppc405ex_pciex_init_utl,
+ .check_link = ppc4xx_pciex_check_link_sdr,
+};
+
+#endif /* CONFIG_40x */
+
+#ifdef CONFIG_476FPE
+static int __init ppc_476fpe_pciex_core_init(struct device_node *np)
+{
+ return 4;
+}
+
+static void __init ppc_476fpe_pciex_check_link(struct ppc4xx_pciex_port *port)
+{
+ u32 timeout_ms = 20;
+ u32 val = 0, mask = (PECFG_TLDLP_LNKUP|PECFG_TLDLP_PRESENT);
+ void __iomem *mbase = ioremap(port->cfg_space.start + 0x10000000,
+ 0x1000);
+
+ printk(KERN_INFO "PCIE%d: Checking link...\n", port->index);
+
+ if (mbase == NULL) {
+ printk(KERN_WARNING "PCIE%d: failed to get cfg space\n",
+ port->index);
+ return;
+ }
+
+ while (timeout_ms--) {
+ val = in_le32(mbase + PECFG_TLDLP);
+
+ if ((val & mask) == mask)
+ break;
+ msleep(10);
+ }
+
+ if (val & PECFG_TLDLP_PRESENT) {
+ printk(KERN_INFO "PCIE%d: link is up !\n", port->index);
+ port->link = 1;
+ } else
+ printk(KERN_WARNING "PCIE%d: Link up failed\n", port->index);
+
+ iounmap(mbase);
+}
+
+static struct ppc4xx_pciex_hwops ppc_476fpe_pcie_hwops __initdata =
+{
+ .core_init = ppc_476fpe_pciex_core_init,
+ .check_link = ppc_476fpe_pciex_check_link,
+};
+#endif /* CONFIG_476FPE */
+
+/* Check that the core has been initied and if not, do it */
+static int __init ppc4xx_pciex_check_core_init(struct device_node *np)
+{
+ static int core_init;
+ int count = -ENODEV;
+
+ if (core_init++)
+ return 0;
+
+#ifdef CONFIG_44x
+ if (of_device_is_compatible(np, "ibm,plb-pciex-440spe")) {
+ if (ppc440spe_revA())
+ ppc4xx_pciex_hwops = &ppc440speA_pcie_hwops;
+ else
+ ppc4xx_pciex_hwops = &ppc440speB_pcie_hwops;
+ }
+ if (of_device_is_compatible(np, "ibm,plb-pciex-460ex"))
+ ppc4xx_pciex_hwops = &ppc460ex_pcie_hwops;
+ if (of_device_is_compatible(np, "ibm,plb-pciex-460sx"))
+ ppc4xx_pciex_hwops = &ppc460sx_pcie_hwops;
+ if (of_device_is_compatible(np, "ibm,plb-pciex-apm821xx"))
+ ppc4xx_pciex_hwops = &apm821xx_pcie_hwops;
+#endif /* CONFIG_44x */
+#ifdef CONFIG_40x
+ if (of_device_is_compatible(np, "ibm,plb-pciex-405ex"))
+ ppc4xx_pciex_hwops = &ppc405ex_pcie_hwops;
+#endif
+#ifdef CONFIG_476FPE
+ if (of_device_is_compatible(np, "ibm,plb-pciex-476fpe")
+ || of_device_is_compatible(np, "ibm,plb-pciex-476gtr"))
+ ppc4xx_pciex_hwops = &ppc_476fpe_pcie_hwops;
+#endif
+ if (ppc4xx_pciex_hwops == NULL) {
+ printk(KERN_WARNING "PCIE: unknown host type %pOF\n", np);
+ return -ENODEV;
+ }
+
+ count = ppc4xx_pciex_hwops->core_init(np);
+ if (count > 0) {
+ ppc4xx_pciex_ports =
+ kcalloc(count, sizeof(struct ppc4xx_pciex_port),
+ GFP_KERNEL);
+ if (ppc4xx_pciex_ports) {
+ ppc4xx_pciex_port_count = count;
+ return 0;
+ }
+ printk(KERN_WARNING "PCIE: failed to allocate ports array\n");
+ return -ENOMEM;
+ }
+ return -ENODEV;
+}
+
+static void __init ppc4xx_pciex_port_init_mapping(struct ppc4xx_pciex_port *port)
+{
+ /* We map PCI Express configuration based on the reg property */
+ dcr_write(port->dcrs, DCRO_PEGPL_CFGBAH,
+ RES_TO_U32_HIGH(port->cfg_space.start));
+ dcr_write(port->dcrs, DCRO_PEGPL_CFGBAL,
+ RES_TO_U32_LOW(port->cfg_space.start));
+
+ /* XXX FIXME: Use size from reg property. For now, map 512M */
+ dcr_write(port->dcrs, DCRO_PEGPL_CFGMSK, 0xe0000001);
+
+ /* We map UTL registers based on the reg property */
+ dcr_write(port->dcrs, DCRO_PEGPL_REGBAH,
+ RES_TO_U32_HIGH(port->utl_regs.start));
+ dcr_write(port->dcrs, DCRO_PEGPL_REGBAL,
+ RES_TO_U32_LOW(port->utl_regs.start));
+
+ /* XXX FIXME: Use size from reg property */
+ dcr_write(port->dcrs, DCRO_PEGPL_REGMSK, 0x00007001);
+
+ /* Disable all other outbound windows */
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL, 0);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR2MSKL, 0);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR3MSKL, 0);
+ dcr_write(port->dcrs, DCRO_PEGPL_MSGMSK, 0);
+}
+
+static int __init ppc4xx_pciex_port_init(struct ppc4xx_pciex_port *port)
+{
+ int rc = 0;
+
+ /* Init HW */
+ if (ppc4xx_pciex_hwops->port_init_hw)
+ rc = ppc4xx_pciex_hwops->port_init_hw(port);
+ if (rc != 0)
+ return rc;
+
+ /*
+ * Initialize mapping: disable all regions and configure
+ * CFG and REG regions based on resources in the device tree
+ */
+ ppc4xx_pciex_port_init_mapping(port);
+
+ if (ppc4xx_pciex_hwops->check_link)
+ ppc4xx_pciex_hwops->check_link(port);
+
+ /*
+ * Map UTL
+ */
+ port->utl_base = ioremap(port->utl_regs.start, 0x100);
+ BUG_ON(port->utl_base == NULL);
+
+ /*
+ * Setup UTL registers --BenH.
+ */
+ if (ppc4xx_pciex_hwops->setup_utl)
+ ppc4xx_pciex_hwops->setup_utl(port);
+
+ /*
+ * Check for VC0 active or PLL Locked and assert RDY.
+ */
+ if (port->sdr_base) {
+ if (of_device_is_compatible(port->node,
+ "ibm,plb-pciex-460sx")){
+ if (port->link && ppc4xx_pciex_wait_on_sdr(port,
+ PESDRn_RCSSTS,
+ 1 << 12, 1 << 12, 5000)) {
+ printk(KERN_INFO "PCIE%d: PLL not locked\n",
+ port->index);
+ port->link = 0;
+ }
+ } else if (port->link &&
+ ppc4xx_pciex_wait_on_sdr(port, PESDRn_RCSSTS,
+ 1 << 16, 1 << 16, 5000)) {
+ printk(KERN_INFO "PCIE%d: VC0 not active\n",
+ port->index);
+ port->link = 0;
+ }
+
+ dcri_clrset(SDR0, port->sdr_base + PESDRn_RCSSET, 0, 1 << 20);
+ }
+
+ msleep(100);
+
+ return 0;
+}
+
+static int ppc4xx_pciex_validate_bdf(struct ppc4xx_pciex_port *port,
+ struct pci_bus *bus,
+ unsigned int devfn)
+{
+ static int message;
+
+ /* Endpoint can not generate upstream(remote) config cycles */
+ if (port->endpoint && bus->number != port->hose->first_busno)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /* Check we are within the mapped range */
+ if (bus->number > port->hose->last_busno) {
+ if (!message) {
+ printk(KERN_WARNING "Warning! Probing bus %u"
+ " out of range !\n", bus->number);
+ message++;
+ }
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+
+ /* The root complex has only one device / function */
+ if (bus->number == port->hose->first_busno && devfn != 0)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /* The other side of the RC has only one device as well */
+ if (bus->number == (port->hose->first_busno + 1) &&
+ PCI_SLOT(devfn) != 0)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /* Check if we have a link */
+ if ((bus->number != port->hose->first_busno) && !port->link)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ return 0;
+}
+
+static void __iomem *ppc4xx_pciex_get_config_base(struct ppc4xx_pciex_port *port,
+ struct pci_bus *bus,
+ unsigned int devfn)
+{
+ int relbus;
+
+ /* Remove the casts when we finally remove the stupid volatile
+ * in struct pci_controller
+ */
+ if (bus->number == port->hose->first_busno)
+ return (void __iomem *)port->hose->cfg_addr;
+
+ relbus = bus->number - (port->hose->first_busno + 1);
+ return (void __iomem *)port->hose->cfg_data +
+ ((relbus << 20) | (devfn << 12));
+}
+
+static int ppc4xx_pciex_read_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 *val)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct ppc4xx_pciex_port *port =
+ &ppc4xx_pciex_ports[hose->indirect_type];
+ void __iomem *addr;
+ u32 gpl_cfg;
+
+ BUG_ON(hose != port->hose);
+
+ if (ppc4xx_pciex_validate_bdf(port, bus, devfn) != 0)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ addr = ppc4xx_pciex_get_config_base(port, bus, devfn);
+
+ /*
+ * Reading from configuration space of non-existing device can
+ * generate transaction errors. For the read duration we suppress
+ * assertion of machine check exceptions to avoid those.
+ */
+ gpl_cfg = dcr_read(port->dcrs, DCRO_PEGPL_CFG);
+ dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg | GPL_DMER_MASK_DISA);
+
+ /* Make sure no CRS is recorded */
+ out_be32(port->utl_base + PEUTL_RCSTA, 0x00040000);
+
+ switch (len) {
+ case 1:
+ *val = in_8((u8 *)(addr + offset));
+ break;
+ case 2:
+ *val = in_le16((u16 *)(addr + offset));
+ break;
+ default:
+ *val = in_le32((u32 *)(addr + offset));
+ break;
+ }
+
+ pr_debug("pcie-config-read: bus=%3d [%3d..%3d] devfn=0x%04x"
+ " offset=0x%04x len=%d, addr=0x%p val=0x%08x\n",
+ bus->number, hose->first_busno, hose->last_busno,
+ devfn, offset, len, addr + offset, *val);
+
+ /* Check for CRS (440SPe rev B does that for us but heh ..) */
+ if (in_be32(port->utl_base + PEUTL_RCSTA) & 0x00040000) {
+ pr_debug("Got CRS !\n");
+ if (len != 4 || offset != 0)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ *val = 0xffff0001;
+ }
+
+ dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg);
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int ppc4xx_pciex_write_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 val)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct ppc4xx_pciex_port *port =
+ &ppc4xx_pciex_ports[hose->indirect_type];
+ void __iomem *addr;
+ u32 gpl_cfg;
+
+ if (ppc4xx_pciex_validate_bdf(port, bus, devfn) != 0)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ addr = ppc4xx_pciex_get_config_base(port, bus, devfn);
+
+ /*
+ * Reading from configuration space of non-existing device can
+ * generate transaction errors. For the read duration we suppress
+ * assertion of machine check exceptions to avoid those.
+ */
+ gpl_cfg = dcr_read(port->dcrs, DCRO_PEGPL_CFG);
+ dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg | GPL_DMER_MASK_DISA);
+
+ pr_debug("pcie-config-write: bus=%3d [%3d..%3d] devfn=0x%04x"
+ " offset=0x%04x len=%d, addr=0x%p val=0x%08x\n",
+ bus->number, hose->first_busno, hose->last_busno,
+ devfn, offset, len, addr + offset, val);
+
+ switch (len) {
+ case 1:
+ out_8((u8 *)(addr + offset), val);
+ break;
+ case 2:
+ out_le16((u16 *)(addr + offset), val);
+ break;
+ default:
+ out_le32((u32 *)(addr + offset), val);
+ break;
+ }
+
+ dcr_write(port->dcrs, DCRO_PEGPL_CFG, gpl_cfg);
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops ppc4xx_pciex_pci_ops =
+{
+ .read = ppc4xx_pciex_read_config,
+ .write = ppc4xx_pciex_write_config,
+};
+
+static int __init ppc4xx_setup_one_pciex_POM(struct ppc4xx_pciex_port *port,
+ struct pci_controller *hose,
+ void __iomem *mbase,
+ u64 plb_addr,
+ u64 pci_addr,
+ u64 size,
+ unsigned int flags,
+ int index)
+{
+ u32 lah, lal, pciah, pcial, sa;
+
+ if (!is_power_of_2(size) ||
+ (index < 2 && size < 0x100000) ||
+ (index == 2 && size < 0x100) ||
+ (plb_addr & (size - 1)) != 0) {
+ printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn);
+ return -1;
+ }
+
+ /* Calculate register values */
+ lah = RES_TO_U32_HIGH(plb_addr);
+ lal = RES_TO_U32_LOW(plb_addr);
+ pciah = RES_TO_U32_HIGH(pci_addr);
+ pcial = RES_TO_U32_LOW(pci_addr);
+ sa = (0xffffffffu << ilog2(size)) | 0x1;
+
+ /* Program register values */
+ switch (index) {
+ case 0:
+ out_le32(mbase + PECFG_POM0LAH, pciah);
+ out_le32(mbase + PECFG_POM0LAL, pcial);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR1BAH, lah);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR1BAL, lal);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKH, 0x7fffffff);
+ /*Enabled and single region */
+ if (of_device_is_compatible(port->node, "ibm,plb-pciex-460sx"))
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL,
+ sa | DCRO_PEGPL_460SX_OMR1MSKL_UOT
+ | DCRO_PEGPL_OMRxMSKL_VAL);
+ else if (of_device_is_compatible(
+ port->node, "ibm,plb-pciex-476fpe") ||
+ of_device_is_compatible(
+ port->node, "ibm,plb-pciex-476gtr"))
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL,
+ sa | DCRO_PEGPL_476FPE_OMR1MSKL_UOT
+ | DCRO_PEGPL_OMRxMSKL_VAL);
+ else
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR1MSKL,
+ sa | DCRO_PEGPL_OMR1MSKL_UOT
+ | DCRO_PEGPL_OMRxMSKL_VAL);
+ break;
+ case 1:
+ out_le32(mbase + PECFG_POM1LAH, pciah);
+ out_le32(mbase + PECFG_POM1LAL, pcial);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR2BAH, lah);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR2BAL, lal);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR2MSKH, 0x7fffffff);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR2MSKL,
+ sa | DCRO_PEGPL_OMRxMSKL_VAL);
+ break;
+ case 2:
+ out_le32(mbase + PECFG_POM2LAH, pciah);
+ out_le32(mbase + PECFG_POM2LAL, pcial);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR3BAH, lah);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR3BAL, lal);
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR3MSKH, 0x7fffffff);
+ /* Note that 3 here means enabled | IO space !!! */
+ dcr_write(port->dcrs, DCRO_PEGPL_OMR3MSKL,
+ sa | DCRO_PEGPL_OMR3MSKL_IO
+ | DCRO_PEGPL_OMRxMSKL_VAL);
+ break;
+ }
+
+ return 0;
+}
+
+static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
+ struct pci_controller *hose,
+ void __iomem *mbase)
+{
+ int i, j, found_isa_hole = 0;
+
+ /* Setup outbound memory windows */
+ for (i = j = 0; i < 3; i++) {
+ struct resource *res = &hose->mem_resources[i];
+ resource_size_t offset = hose->mem_offset[i];
+
+ /* we only care about memory windows */
+ if (!(res->flags & IORESOURCE_MEM))
+ continue;
+ if (j > 1) {
+ printk(KERN_WARNING "%pOF: Too many ranges\n",
+ port->node);
+ break;
+ }
+
+ /* Configure the resource */
+ if (ppc4xx_setup_one_pciex_POM(port, hose, mbase,
+ res->start,
+ res->start - offset,
+ resource_size(res),
+ res->flags,
+ j) == 0) {
+ j++;
+
+ /* If the resource PCI address is 0 then we have our
+ * ISA memory hole
+ */
+ if (res->start == offset)
+ found_isa_hole = 1;
+ }
+ }
+
+ /* Handle ISA memory hole if not already covered */
+ if (j <= 1 && !found_isa_hole && hose->isa_mem_size)
+ if (ppc4xx_setup_one_pciex_POM(port, hose, mbase,
+ hose->isa_mem_phys, 0,
+ hose->isa_mem_size, 0, j) == 0)
+ printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+ hose->dn);
+
+ /* Configure IO, always 64K starting at 0. We hard wire it to 64K !
+ * Note also that it -has- to be region index 2 on this HW
+ */
+ if (hose->io_resource.flags & IORESOURCE_IO)
+ ppc4xx_setup_one_pciex_POM(port, hose, mbase,
+ hose->io_base_phys, 0,
+ 0x10000, IORESOURCE_IO, 2);
+}
+
+static void __init ppc4xx_configure_pciex_PIMs(struct ppc4xx_pciex_port *port,
+ struct pci_controller *hose,
+ void __iomem *mbase,
+ struct resource *res)
+{
+ resource_size_t size = resource_size(res);
+ u64 sa;
+
+ if (port->endpoint) {
+ resource_size_t ep_addr = 0;
+ resource_size_t ep_size = 32 << 20;
+
+ /* Currently we map a fixed 64MByte window to PLB address
+ * 0 (SDRAM). This should probably be configurable via a dts
+ * property.
+ */
+
+ /* Calculate window size */
+ sa = (0xffffffffffffffffull << ilog2(ep_size));
+
+ /* Setup BAR0 */
+ out_le32(mbase + PECFG_BAR0HMPA, RES_TO_U32_HIGH(sa));
+ out_le32(mbase + PECFG_BAR0LMPA, RES_TO_U32_LOW(sa) |
+ PCI_BASE_ADDRESS_MEM_TYPE_64);
+
+ /* Disable BAR1 & BAR2 */
+ out_le32(mbase + PECFG_BAR1MPA, 0);
+ out_le32(mbase + PECFG_BAR2HMPA, 0);
+ out_le32(mbase + PECFG_BAR2LMPA, 0);
+
+ out_le32(mbase + PECFG_PIM01SAH, RES_TO_U32_HIGH(sa));
+ out_le32(mbase + PECFG_PIM01SAL, RES_TO_U32_LOW(sa));
+
+ out_le32(mbase + PCI_BASE_ADDRESS_0, RES_TO_U32_LOW(ep_addr));
+ out_le32(mbase + PCI_BASE_ADDRESS_1, RES_TO_U32_HIGH(ep_addr));
+ } else {
+ /* Calculate window size */
+ sa = (0xffffffffffffffffull << ilog2(size));
+ if (res->flags & IORESOURCE_PREFETCH)
+ sa |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+ if (of_device_is_compatible(port->node, "ibm,plb-pciex-460sx") ||
+ of_device_is_compatible(
+ port->node, "ibm,plb-pciex-476fpe") ||
+ of_device_is_compatible(
+ port->node, "ibm,plb-pciex-476gtr"))
+ sa |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+
+ out_le32(mbase + PECFG_BAR0HMPA, RES_TO_U32_HIGH(sa));
+ out_le32(mbase + PECFG_BAR0LMPA, RES_TO_U32_LOW(sa));
+
+ /* The setup of the split looks weird to me ... let's see
+ * if it works
+ */
+ out_le32(mbase + PECFG_PIM0LAL, 0x00000000);
+ out_le32(mbase + PECFG_PIM0LAH, 0x00000000);
+ out_le32(mbase + PECFG_PIM1LAL, 0x00000000);
+ out_le32(mbase + PECFG_PIM1LAH, 0x00000000);
+ out_le32(mbase + PECFG_PIM01SAH, 0xffff0000);
+ out_le32(mbase + PECFG_PIM01SAL, 0x00000000);
+
+ out_le32(mbase + PCI_BASE_ADDRESS_0, RES_TO_U32_LOW(res->start));
+ out_le32(mbase + PCI_BASE_ADDRESS_1, RES_TO_U32_HIGH(res->start));
+ }
+
+ /* Enable inbound mapping */
+ out_le32(mbase + PECFG_PIMEN, 0x1);
+
+ /* Enable I/O, Mem, and Busmaster cycles */
+ out_le16(mbase + PCI_COMMAND,
+ in_le16(mbase + PCI_COMMAND) |
+ PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+}
+
+static void __init ppc4xx_pciex_port_setup_hose(struct ppc4xx_pciex_port *port)
+{
+ struct resource dma_window;
+ struct pci_controller *hose = NULL;
+ const int *bus_range;
+ int primary, busses;
+ void __iomem *mbase = NULL, *cfg_data = NULL;
+ const u32 *pval;
+ u32 val;
+
+ /* Check if primary bridge */
+ primary = of_property_read_bool(port->node, "primary");
+
+ /* Get bus range if any */
+ bus_range = of_get_property(port->node, "bus-range", NULL);
+
+ /* Allocate the host controller data structure */
+ hose = pcibios_alloc_controller(port->node);
+ if (!hose)
+ goto fail;
+
+ /* We stick the port number in "indirect_type" so the config space
+ * ops can retrieve the port data structure easily
+ */
+ hose->indirect_type = port->index;
+
+ /* Get bus range */
+ hose->first_busno = bus_range ? bus_range[0] : 0x0;
+ hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+ /* Because of how big mapping the config space is (1M per bus), we
+ * limit how many busses we support. In the long run, we could replace
+ * that with something akin to kmap_atomic instead. We set aside 1 bus
+ * for the host itself too.
+ */
+ busses = hose->last_busno - hose->first_busno; /* This is off by 1 */
+ if (busses > MAX_PCIE_BUS_MAPPED) {
+ busses = MAX_PCIE_BUS_MAPPED;
+ hose->last_busno = hose->first_busno + busses;
+ }
+
+ if (!port->endpoint) {
+ /* Only map the external config space in cfg_data for
+ * PCIe root-complexes. External space is 1M per bus
+ */
+ cfg_data = ioremap(port->cfg_space.start +
+ (hose->first_busno + 1) * 0x100000,
+ busses * 0x100000);
+ if (cfg_data == NULL) {
+ printk(KERN_ERR "%pOF: Can't map external config space !",
+ port->node);
+ goto fail;
+ }
+ hose->cfg_data = cfg_data;
+ }
+
+ /* Always map the host config space in cfg_addr.
+ * Internal space is 4K
+ */
+ mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000);
+ if (mbase == NULL) {
+ printk(KERN_ERR "%pOF: Can't map internal config space !",
+ port->node);
+ goto fail;
+ }
+ hose->cfg_addr = mbase;
+
+ pr_debug("PCIE %pOF, bus %d..%d\n", port->node,
+ hose->first_busno, hose->last_busno);
+ pr_debug(" config space mapped at: root @0x%p, other @0x%p\n",
+ hose->cfg_addr, hose->cfg_data);
+
+ /* Setup config space */
+ hose->ops = &ppc4xx_pciex_pci_ops;
+ port->hose = hose;
+ mbase = (void __iomem *)hose->cfg_addr;
+
+ if (!port->endpoint) {
+ /*
+ * Set bus numbers on our root port
+ */
+ out_8(mbase + PCI_PRIMARY_BUS, hose->first_busno);
+ out_8(mbase + PCI_SECONDARY_BUS, hose->first_busno + 1);
+ out_8(mbase + PCI_SUBORDINATE_BUS, hose->last_busno);
+ }
+
+ /*
+ * OMRs are already reset, also disable PIMs
+ */
+ out_le32(mbase + PECFG_PIMEN, 0);
+
+ /* Parse outbound mapping resources */
+ pci_process_bridge_OF_ranges(hose, port->node, primary);
+
+ /* Parse inbound mapping resources */
+ if (ppc4xx_parse_dma_ranges(hose, mbase, &dma_window) != 0)
+ goto fail;
+
+ /* Configure outbound ranges POMs */
+ ppc4xx_configure_pciex_POMs(port, hose, mbase);
+
+ /* Configure inbound ranges PIMs */
+ ppc4xx_configure_pciex_PIMs(port, hose, mbase, &dma_window);
+
+ /* The root complex doesn't show up if we don't set some vendor
+ * and device IDs into it. The defaults below are the same bogus
+ * one that the initial code in arch/ppc had. This can be
+ * overwritten by setting the "vendor-id/device-id" properties
+ * in the pciex node.
+ */
+
+ /* Get the (optional) vendor-/device-id from the device-tree */
+ pval = of_get_property(port->node, "vendor-id", NULL);
+ if (pval) {
+ val = *pval;
+ } else {
+ if (!port->endpoint)
+ val = 0xaaa0 + port->index;
+ else
+ val = 0xeee0 + port->index;
+ }
+ out_le16(mbase + 0x200, val);
+
+ pval = of_get_property(port->node, "device-id", NULL);
+ if (pval) {
+ val = *pval;
+ } else {
+ if (!port->endpoint)
+ val = 0xbed0 + port->index;
+ else
+ val = 0xfed0 + port->index;
+ }
+ out_le16(mbase + 0x202, val);
+
+ /* Enable Bus master, memory, and io space */
+ if (of_device_is_compatible(port->node, "ibm,plb-pciex-460sx"))
+ out_le16(mbase + 0x204, 0x7);
+
+ if (!port->endpoint) {
+ /* Set Class Code to PCI-PCI bridge and Revision Id to 1 */
+ out_le32(mbase + 0x208, 0x06040001);
+
+ printk(KERN_INFO "PCIE%d: successfully set as root-complex\n",
+ port->index);
+ } else {
+ /* Set Class Code to Processor/PPC */
+ out_le32(mbase + 0x208, 0x0b200001);
+
+ printk(KERN_INFO "PCIE%d: successfully set as endpoint\n",
+ port->index);
+ }
+
+ return;
+ fail:
+ if (hose)
+ pcibios_free_controller(hose);
+ if (cfg_data)
+ iounmap(cfg_data);
+ if (mbase)
+ iounmap(mbase);
+}
+
+static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
+{
+ struct ppc4xx_pciex_port *port;
+ const u32 *pval;
+ int portno;
+ unsigned int dcrs;
+
+ /* First, proceed to core initialization as we assume there's
+ * only one PCIe core in the system
+ */
+ if (ppc4xx_pciex_check_core_init(np))
+ return;
+
+ /* Get the port number from the device-tree */
+ pval = of_get_property(np, "port", NULL);
+ if (pval == NULL) {
+ printk(KERN_ERR "PCIE: Can't find port number for %pOF\n", np);
+ return;
+ }
+ portno = *pval;
+ if (portno >= ppc4xx_pciex_port_count) {
+ printk(KERN_ERR "PCIE: port number out of range for %pOF\n",
+ np);
+ return;
+ }
+ port = &ppc4xx_pciex_ports[portno];
+ port->index = portno;
+
+ /*
+ * Check if device is enabled
+ */
+ if (!of_device_is_available(np)) {
+ printk(KERN_INFO "PCIE%d: Port disabled via device-tree\n", port->index);
+ return;
+ }
+
+ port->node = of_node_get(np);
+ if (ppc4xx_pciex_hwops->want_sdr) {
+ pval = of_get_property(np, "sdr-base", NULL);
+ if (pval == NULL) {
+ printk(KERN_ERR "PCIE: missing sdr-base for %pOF\n",
+ np);
+ return;
+ }
+ port->sdr_base = *pval;
+ }
+
+ /* Check if device_type property is set to "pci" or "pci-endpoint".
+ * Resulting from this setup this PCIe port will be configured
+ * as root-complex or as endpoint.
+ */
+ if (of_node_is_type(port->node, "pci-endpoint")) {
+ port->endpoint = 1;
+ } else if (of_node_is_type(port->node, "pci")) {
+ port->endpoint = 0;
+ } else {
+ printk(KERN_ERR "PCIE: missing or incorrect device_type for %pOF\n",
+ np);
+ return;
+ }
+
+ /* Fetch config space registers address */
+ if (of_address_to_resource(np, 0, &port->cfg_space)) {
+ printk(KERN_ERR "%pOF: Can't get PCI-E config space !", np);
+ return;
+ }
+ /* Fetch host bridge internal registers address */
+ if (of_address_to_resource(np, 1, &port->utl_regs)) {
+ printk(KERN_ERR "%pOF: Can't get UTL register base !", np);
+ return;
+ }
+
+ /* Map DCRs */
+ dcrs = dcr_resource_start(np, 0);
+ if (dcrs == 0) {
+ printk(KERN_ERR "%pOF: Can't get DCR register base !", np);
+ return;
+ }
+ port->dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0));
+
+ /* Initialize the port specific registers */
+ if (ppc4xx_pciex_port_init(port)) {
+ printk(KERN_WARNING "PCIE%d: Port init failed\n", port->index);
+ return;
+ }
+
+ /* Setup the linux hose data structure */
+ ppc4xx_pciex_port_setup_hose(port);
+}
+
+#endif /* CONFIG_PPC4xx_PCI_EXPRESS */
+
+static int __init ppc4xx_pci_find_bridges(void)
+{
+ struct device_node *np;
+
+ pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
+
+#ifdef CONFIG_PPC4xx_PCI_EXPRESS
+ for_each_compatible_node(np, NULL, "ibm,plb-pciex")
+ ppc4xx_probe_pciex_bridge(np);
+#endif
+ for_each_compatible_node(np, NULL, "ibm,plb-pcix")
+ ppc4xx_probe_pcix_bridge(np);
+ for_each_compatible_node(np, NULL, "ibm,plb-pci")
+ ppc4xx_probe_pci_bridge(np);
+
+ return 0;
+}
+arch_initcall(ppc4xx_pci_find_bridges);
+
diff --git a/arch/powerpc/platforms/4xx/pci.h b/arch/powerpc/platforms/4xx/pci.h
new file mode 100644
index 000000000..bb4821938
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/pci.h
@@ -0,0 +1,505 @@
+/*
+ * PCI / PCI-X / PCI-Express support for 4xx parts
+ *
+ * Copyright 2007 Ben. Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ *
+ * Bits and pieces extracted from arch/ppc support by
+ *
+ * Matt Porter <mporter@kernel.crashing.org>
+ *
+ * Copyright 2002-2005 MontaVista Software Inc.
+ */
+#ifndef __PPC4XX_PCI_H__
+#define __PPC4XX_PCI_H__
+
+/*
+ * 4xx PCI-X bridge register definitions
+ */
+#define PCIX0_VENDID 0x000
+#define PCIX0_DEVID 0x002
+#define PCIX0_COMMAND 0x004
+#define PCIX0_STATUS 0x006
+#define PCIX0_REVID 0x008
+#define PCIX0_CLS 0x009
+#define PCIX0_CACHELS 0x00c
+#define PCIX0_LATTIM 0x00d
+#define PCIX0_HDTYPE 0x00e
+#define PCIX0_BIST 0x00f
+#define PCIX0_BAR0L 0x010
+#define PCIX0_BAR0H 0x014
+#define PCIX0_BAR1 0x018
+#define PCIX0_BAR2L 0x01c
+#define PCIX0_BAR2H 0x020
+#define PCIX0_BAR3 0x024
+#define PCIX0_CISPTR 0x028
+#define PCIX0_SBSYSVID 0x02c
+#define PCIX0_SBSYSID 0x02e
+#define PCIX0_EROMBA 0x030
+#define PCIX0_CAP 0x034
+#define PCIX0_RES0 0x035
+#define PCIX0_RES1 0x036
+#define PCIX0_RES2 0x038
+#define PCIX0_INTLN 0x03c
+#define PCIX0_INTPN 0x03d
+#define PCIX0_MINGNT 0x03e
+#define PCIX0_MAXLTNCY 0x03f
+#define PCIX0_BRDGOPT1 0x040
+#define PCIX0_BRDGOPT2 0x044
+#define PCIX0_ERREN 0x050
+#define PCIX0_ERRSTS 0x054
+#define PCIX0_PLBBESR 0x058
+#define PCIX0_PLBBEARL 0x05c
+#define PCIX0_PLBBEARH 0x060
+#define PCIX0_POM0LAL 0x068
+#define PCIX0_POM0LAH 0x06c
+#define PCIX0_POM0SA 0x070
+#define PCIX0_POM0PCIAL 0x074
+#define PCIX0_POM0PCIAH 0x078
+#define PCIX0_POM1LAL 0x07c
+#define PCIX0_POM1LAH 0x080
+#define PCIX0_POM1SA 0x084
+#define PCIX0_POM1PCIAL 0x088
+#define PCIX0_POM1PCIAH 0x08c
+#define PCIX0_POM2SA 0x090
+#define PCIX0_PIM0SAL 0x098
+#define PCIX0_PIM0SA PCIX0_PIM0SAL
+#define PCIX0_PIM0LAL 0x09c
+#define PCIX0_PIM0LAH 0x0a0
+#define PCIX0_PIM1SA 0x0a4
+#define PCIX0_PIM1LAL 0x0a8
+#define PCIX0_PIM1LAH 0x0ac
+#define PCIX0_PIM2SAL 0x0b0
+#define PCIX0_PIM2SA PCIX0_PIM2SAL
+#define PCIX0_PIM2LAL 0x0b4
+#define PCIX0_PIM2LAH 0x0b8
+#define PCIX0_OMCAPID 0x0c0
+#define PCIX0_OMNIPTR 0x0c1
+#define PCIX0_OMMC 0x0c2
+#define PCIX0_OMMA 0x0c4
+#define PCIX0_OMMUA 0x0c8
+#define PCIX0_OMMDATA 0x0cc
+#define PCIX0_OMMEOI 0x0ce
+#define PCIX0_PMCAPID 0x0d0
+#define PCIX0_PMNIPTR 0x0d1
+#define PCIX0_PMC 0x0d2
+#define PCIX0_PMCSR 0x0d4
+#define PCIX0_PMCSRBSE 0x0d6
+#define PCIX0_PMDATA 0x0d7
+#define PCIX0_PMSCRR 0x0d8
+#define PCIX0_CAPID 0x0dc
+#define PCIX0_NIPTR 0x0dd
+#define PCIX0_CMD 0x0de
+#define PCIX0_STS 0x0e0
+#define PCIX0_IDR 0x0e4
+#define PCIX0_CID 0x0e8
+#define PCIX0_RID 0x0ec
+#define PCIX0_PIM0SAH 0x0f8
+#define PCIX0_PIM2SAH 0x0fc
+#define PCIX0_MSGIL 0x100
+#define PCIX0_MSGIH 0x104
+#define PCIX0_MSGOL 0x108
+#define PCIX0_MSGOH 0x10c
+#define PCIX0_IM 0x1f8
+
+/*
+ * 4xx PCI bridge register definitions
+ */
+#define PCIL0_PMM0LA 0x00
+#define PCIL0_PMM0MA 0x04
+#define PCIL0_PMM0PCILA 0x08
+#define PCIL0_PMM0PCIHA 0x0c
+#define PCIL0_PMM1LA 0x10
+#define PCIL0_PMM1MA 0x14
+#define PCIL0_PMM1PCILA 0x18
+#define PCIL0_PMM1PCIHA 0x1c
+#define PCIL0_PMM2LA 0x20
+#define PCIL0_PMM2MA 0x24
+#define PCIL0_PMM2PCILA 0x28
+#define PCIL0_PMM2PCIHA 0x2c
+#define PCIL0_PTM1MS 0x30
+#define PCIL0_PTM1LA 0x34
+#define PCIL0_PTM2MS 0x38
+#define PCIL0_PTM2LA 0x3c
+
+/*
+ * 4xx PCIe bridge register definitions
+ */
+
+/* DCR offsets */
+#define DCRO_PEGPL_CFGBAH 0x00
+#define DCRO_PEGPL_CFGBAL 0x01
+#define DCRO_PEGPL_CFGMSK 0x02
+#define DCRO_PEGPL_MSGBAH 0x03
+#define DCRO_PEGPL_MSGBAL 0x04
+#define DCRO_PEGPL_MSGMSK 0x05
+#define DCRO_PEGPL_OMR1BAH 0x06
+#define DCRO_PEGPL_OMR1BAL 0x07
+#define DCRO_PEGPL_OMR1MSKH 0x08
+#define DCRO_PEGPL_OMR1MSKL 0x09
+#define DCRO_PEGPL_OMR2BAH 0x0a
+#define DCRO_PEGPL_OMR2BAL 0x0b
+#define DCRO_PEGPL_OMR2MSKH 0x0c
+#define DCRO_PEGPL_OMR2MSKL 0x0d
+#define DCRO_PEGPL_OMR3BAH 0x0e
+#define DCRO_PEGPL_OMR3BAL 0x0f
+#define DCRO_PEGPL_OMR3MSKH 0x10
+#define DCRO_PEGPL_OMR3MSKL 0x11
+#define DCRO_PEGPL_REGBAH 0x12
+#define DCRO_PEGPL_REGBAL 0x13
+#define DCRO_PEGPL_REGMSK 0x14
+#define DCRO_PEGPL_SPECIAL 0x15
+#define DCRO_PEGPL_CFG 0x16
+#define DCRO_PEGPL_ESR 0x17
+#define DCRO_PEGPL_EARH 0x18
+#define DCRO_PEGPL_EARL 0x19
+#define DCRO_PEGPL_EATR 0x1a
+
+/* DMER mask */
+#define GPL_DMER_MASK_DISA 0x02000000
+
+/*
+ * System DCRs (SDRs)
+ */
+#define PESDR0_PLLLCT1 0x03a0
+#define PESDR0_PLLLCT2 0x03a1
+#define PESDR0_PLLLCT3 0x03a2
+
+/*
+ * 440SPe additional DCRs
+ */
+#define PESDR0_440SPE_UTLSET1 0x0300
+#define PESDR0_440SPE_UTLSET2 0x0301
+#define PESDR0_440SPE_DLPSET 0x0302
+#define PESDR0_440SPE_LOOP 0x0303
+#define PESDR0_440SPE_RCSSET 0x0304
+#define PESDR0_440SPE_RCSSTS 0x0305
+#define PESDR0_440SPE_HSSL0SET1 0x0306
+#define PESDR0_440SPE_HSSL0SET2 0x0307
+#define PESDR0_440SPE_HSSL0STS 0x0308
+#define PESDR0_440SPE_HSSL1SET1 0x0309
+#define PESDR0_440SPE_HSSL1SET2 0x030a
+#define PESDR0_440SPE_HSSL1STS 0x030b
+#define PESDR0_440SPE_HSSL2SET1 0x030c
+#define PESDR0_440SPE_HSSL2SET2 0x030d
+#define PESDR0_440SPE_HSSL2STS 0x030e
+#define PESDR0_440SPE_HSSL3SET1 0x030f
+#define PESDR0_440SPE_HSSL3SET2 0x0310
+#define PESDR0_440SPE_HSSL3STS 0x0311
+#define PESDR0_440SPE_HSSL4SET1 0x0312
+#define PESDR0_440SPE_HSSL4SET2 0x0313
+#define PESDR0_440SPE_HSSL4STS 0x0314
+#define PESDR0_440SPE_HSSL5SET1 0x0315
+#define PESDR0_440SPE_HSSL5SET2 0x0316
+#define PESDR0_440SPE_HSSL5STS 0x0317
+#define PESDR0_440SPE_HSSL6SET1 0x0318
+#define PESDR0_440SPE_HSSL6SET2 0x0319
+#define PESDR0_440SPE_HSSL6STS 0x031a
+#define PESDR0_440SPE_HSSL7SET1 0x031b
+#define PESDR0_440SPE_HSSL7SET2 0x031c
+#define PESDR0_440SPE_HSSL7STS 0x031d
+#define PESDR0_440SPE_HSSCTLSET 0x031e
+#define PESDR0_440SPE_LANE_ABCD 0x031f
+#define PESDR0_440SPE_LANE_EFGH 0x0320
+
+#define PESDR1_440SPE_UTLSET1 0x0340
+#define PESDR1_440SPE_UTLSET2 0x0341
+#define PESDR1_440SPE_DLPSET 0x0342
+#define PESDR1_440SPE_LOOP 0x0343
+#define PESDR1_440SPE_RCSSET 0x0344
+#define PESDR1_440SPE_RCSSTS 0x0345
+#define PESDR1_440SPE_HSSL0SET1 0x0346
+#define PESDR1_440SPE_HSSL0SET2 0x0347
+#define PESDR1_440SPE_HSSL0STS 0x0348
+#define PESDR1_440SPE_HSSL1SET1 0x0349
+#define PESDR1_440SPE_HSSL1SET2 0x034a
+#define PESDR1_440SPE_HSSL1STS 0x034b
+#define PESDR1_440SPE_HSSL2SET1 0x034c
+#define PESDR1_440SPE_HSSL2SET2 0x034d
+#define PESDR1_440SPE_HSSL2STS 0x034e
+#define PESDR1_440SPE_HSSL3SET1 0x034f
+#define PESDR1_440SPE_HSSL3SET2 0x0350
+#define PESDR1_440SPE_HSSL3STS 0x0351
+#define PESDR1_440SPE_HSSCTLSET 0x0352
+#define PESDR1_440SPE_LANE_ABCD 0x0353
+
+#define PESDR2_440SPE_UTLSET1 0x0370
+#define PESDR2_440SPE_UTLSET2 0x0371
+#define PESDR2_440SPE_DLPSET 0x0372
+#define PESDR2_440SPE_LOOP 0x0373
+#define PESDR2_440SPE_RCSSET 0x0374
+#define PESDR2_440SPE_RCSSTS 0x0375
+#define PESDR2_440SPE_HSSL0SET1 0x0376
+#define PESDR2_440SPE_HSSL0SET2 0x0377
+#define PESDR2_440SPE_HSSL0STS 0x0378
+#define PESDR2_440SPE_HSSL1SET1 0x0379
+#define PESDR2_440SPE_HSSL1SET2 0x037a
+#define PESDR2_440SPE_HSSL1STS 0x037b
+#define PESDR2_440SPE_HSSL2SET1 0x037c
+#define PESDR2_440SPE_HSSL2SET2 0x037d
+#define PESDR2_440SPE_HSSL2STS 0x037e
+#define PESDR2_440SPE_HSSL3SET1 0x037f
+#define PESDR2_440SPE_HSSL3SET2 0x0380
+#define PESDR2_440SPE_HSSL3STS 0x0381
+#define PESDR2_440SPE_HSSCTLSET 0x0382
+#define PESDR2_440SPE_LANE_ABCD 0x0383
+
+/*
+ * 405EX additional DCRs
+ */
+#define PESDR0_405EX_UTLSET1 0x0400
+#define PESDR0_405EX_UTLSET2 0x0401
+#define PESDR0_405EX_DLPSET 0x0402
+#define PESDR0_405EX_LOOP 0x0403
+#define PESDR0_405EX_RCSSET 0x0404
+#define PESDR0_405EX_RCSSTS 0x0405
+#define PESDR0_405EX_PHYSET1 0x0406
+#define PESDR0_405EX_PHYSET2 0x0407
+#define PESDR0_405EX_BIST 0x0408
+#define PESDR0_405EX_LPB 0x040B
+#define PESDR0_405EX_PHYSTA 0x040C
+
+#define PESDR1_405EX_UTLSET1 0x0440
+#define PESDR1_405EX_UTLSET2 0x0441
+#define PESDR1_405EX_DLPSET 0x0442
+#define PESDR1_405EX_LOOP 0x0443
+#define PESDR1_405EX_RCSSET 0x0444
+#define PESDR1_405EX_RCSSTS 0x0445
+#define PESDR1_405EX_PHYSET1 0x0446
+#define PESDR1_405EX_PHYSET2 0x0447
+#define PESDR1_405EX_BIST 0x0448
+#define PESDR1_405EX_LPB 0x044B
+#define PESDR1_405EX_PHYSTA 0x044C
+
+/*
+ * 460EX additional DCRs
+ */
+#define PESDR0_460EX_L0BIST 0x0308
+#define PESDR0_460EX_L0BISTSTS 0x0309
+#define PESDR0_460EX_L0CDRCTL 0x030A
+#define PESDR0_460EX_L0DRV 0x030B
+#define PESDR0_460EX_L0REC 0x030C
+#define PESDR0_460EX_L0LPB 0x030D
+#define PESDR0_460EX_L0CLK 0x030E
+#define PESDR0_460EX_PHY_CTL_RST 0x030F
+#define PESDR0_460EX_RSTSTA 0x0310
+#define PESDR0_460EX_OBS 0x0311
+#define PESDR0_460EX_L0ERRC 0x0320
+
+#define PESDR1_460EX_L0BIST 0x0348
+#define PESDR1_460EX_L1BIST 0x0349
+#define PESDR1_460EX_L2BIST 0x034A
+#define PESDR1_460EX_L3BIST 0x034B
+#define PESDR1_460EX_L0BISTSTS 0x034C
+#define PESDR1_460EX_L1BISTSTS 0x034D
+#define PESDR1_460EX_L2BISTSTS 0x034E
+#define PESDR1_460EX_L3BISTSTS 0x034F
+#define PESDR1_460EX_L0CDRCTL 0x0350
+#define PESDR1_460EX_L1CDRCTL 0x0351
+#define PESDR1_460EX_L2CDRCTL 0x0352
+#define PESDR1_460EX_L3CDRCTL 0x0353
+#define PESDR1_460EX_L0DRV 0x0354
+#define PESDR1_460EX_L1DRV 0x0355
+#define PESDR1_460EX_L2DRV 0x0356
+#define PESDR1_460EX_L3DRV 0x0357
+#define PESDR1_460EX_L0REC 0x0358
+#define PESDR1_460EX_L1REC 0x0359
+#define PESDR1_460EX_L2REC 0x035A
+#define PESDR1_460EX_L3REC 0x035B
+#define PESDR1_460EX_L0LPB 0x035C
+#define PESDR1_460EX_L1LPB 0x035D
+#define PESDR1_460EX_L2LPB 0x035E
+#define PESDR1_460EX_L3LPB 0x035F
+#define PESDR1_460EX_L0CLK 0x0360
+#define PESDR1_460EX_L1CLK 0x0361
+#define PESDR1_460EX_L2CLK 0x0362
+#define PESDR1_460EX_L3CLK 0x0363
+#define PESDR1_460EX_PHY_CTL_RST 0x0364
+#define PESDR1_460EX_RSTSTA 0x0365
+#define PESDR1_460EX_OBS 0x0366
+#define PESDR1_460EX_L0ERRC 0x0368
+#define PESDR1_460EX_L1ERRC 0x0369
+#define PESDR1_460EX_L2ERRC 0x036A
+#define PESDR1_460EX_L3ERRC 0x036B
+#define PESDR0_460EX_IHS1 0x036C
+#define PESDR0_460EX_IHS2 0x036D
+
+/*
+ * 460SX additional DCRs
+ */
+#define PESDRn_460SX_RCEI 0x02
+
+#define PESDR0_460SX_HSSL0DAMP 0x320
+#define PESDR0_460SX_HSSL1DAMP 0x321
+#define PESDR0_460SX_HSSL2DAMP 0x322
+#define PESDR0_460SX_HSSL3DAMP 0x323
+#define PESDR0_460SX_HSSL4DAMP 0x324
+#define PESDR0_460SX_HSSL5DAMP 0x325
+#define PESDR0_460SX_HSSL6DAMP 0x326
+#define PESDR0_460SX_HSSL7DAMP 0x327
+
+#define PESDR1_460SX_HSSL0DAMP 0x354
+#define PESDR1_460SX_HSSL1DAMP 0x355
+#define PESDR1_460SX_HSSL2DAMP 0x356
+#define PESDR1_460SX_HSSL3DAMP 0x357
+
+#define PESDR2_460SX_HSSL0DAMP 0x384
+#define PESDR2_460SX_HSSL1DAMP 0x385
+#define PESDR2_460SX_HSSL2DAMP 0x386
+#define PESDR2_460SX_HSSL3DAMP 0x387
+
+#define PESDR0_460SX_HSSL0COEFA 0x328
+#define PESDR0_460SX_HSSL1COEFA 0x329
+#define PESDR0_460SX_HSSL2COEFA 0x32A
+#define PESDR0_460SX_HSSL3COEFA 0x32B
+#define PESDR0_460SX_HSSL4COEFA 0x32C
+#define PESDR0_460SX_HSSL5COEFA 0x32D
+#define PESDR0_460SX_HSSL6COEFA 0x32E
+#define PESDR0_460SX_HSSL7COEFA 0x32F
+
+#define PESDR1_460SX_HSSL0COEFA 0x358
+#define PESDR1_460SX_HSSL1COEFA 0x359
+#define PESDR1_460SX_HSSL2COEFA 0x35A
+#define PESDR1_460SX_HSSL3COEFA 0x35B
+
+#define PESDR2_460SX_HSSL0COEFA 0x388
+#define PESDR2_460SX_HSSL1COEFA 0x389
+#define PESDR2_460SX_HSSL2COEFA 0x38A
+#define PESDR2_460SX_HSSL3COEFA 0x38B
+
+#define PESDR0_460SX_HSSL1CALDRV 0x339
+#define PESDR1_460SX_HSSL1CALDRV 0x361
+#define PESDR2_460SX_HSSL1CALDRV 0x391
+
+#define PESDR0_460SX_HSSSLEW 0x338
+#define PESDR1_460SX_HSSSLEW 0x360
+#define PESDR2_460SX_HSSSLEW 0x390
+
+#define PESDR0_460SX_HSSCTLSET 0x31E
+#define PESDR1_460SX_HSSCTLSET 0x352
+#define PESDR2_460SX_HSSCTLSET 0x382
+
+#define PESDR0_460SX_RCSSET 0x304
+#define PESDR1_460SX_RCSSET 0x344
+#define PESDR2_460SX_RCSSET 0x374
+/*
+ * Of the above, some are common offsets from the base
+ */
+#define PESDRn_UTLSET1 0x00
+#define PESDRn_UTLSET2 0x01
+#define PESDRn_DLPSET 0x02
+#define PESDRn_LOOP 0x03
+#define PESDRn_RCSSET 0x04
+#define PESDRn_RCSSTS 0x05
+
+/* 440spe only */
+#define PESDRn_440SPE_HSSL0SET1 0x06
+#define PESDRn_440SPE_HSSL0SET2 0x07
+#define PESDRn_440SPE_HSSL0STS 0x08
+#define PESDRn_440SPE_HSSL1SET1 0x09
+#define PESDRn_440SPE_HSSL1SET2 0x0a
+#define PESDRn_440SPE_HSSL1STS 0x0b
+#define PESDRn_440SPE_HSSL2SET1 0x0c
+#define PESDRn_440SPE_HSSL2SET2 0x0d
+#define PESDRn_440SPE_HSSL2STS 0x0e
+#define PESDRn_440SPE_HSSL3SET1 0x0f
+#define PESDRn_440SPE_HSSL3SET2 0x10
+#define PESDRn_440SPE_HSSL3STS 0x11
+
+/* 440spe port 0 only */
+#define PESDRn_440SPE_HSSL4SET1 0x12
+#define PESDRn_440SPE_HSSL4SET2 0x13
+#define PESDRn_440SPE_HSSL4STS 0x14
+#define PESDRn_440SPE_HSSL5SET1 0x15
+#define PESDRn_440SPE_HSSL5SET2 0x16
+#define PESDRn_440SPE_HSSL5STS 0x17
+#define PESDRn_440SPE_HSSL6SET1 0x18
+#define PESDRn_440SPE_HSSL6SET2 0x19
+#define PESDRn_440SPE_HSSL6STS 0x1a
+#define PESDRn_440SPE_HSSL7SET1 0x1b
+#define PESDRn_440SPE_HSSL7SET2 0x1c
+#define PESDRn_440SPE_HSSL7STS 0x1d
+
+/* 405ex only */
+#define PESDRn_405EX_PHYSET1 0x06
+#define PESDRn_405EX_PHYSET2 0x07
+#define PESDRn_405EX_PHYSTA 0x0c
+
+/*
+ * UTL register offsets
+ */
+#define PEUTL_PBCTL 0x00
+#define PEUTL_PBBSZ 0x20
+#define PEUTL_OPDBSZ 0x68
+#define PEUTL_IPHBSZ 0x70
+#define PEUTL_IPDBSZ 0x78
+#define PEUTL_OUTTR 0x90
+#define PEUTL_INTR 0x98
+#define PEUTL_PCTL 0xa0
+#define PEUTL_RCSTA 0xB0
+#define PEUTL_RCIRQEN 0xb8
+
+/*
+ * Config space register offsets
+ */
+#define PECFG_ECRTCTL 0x074
+
+#define PECFG_BAR0LMPA 0x210
+#define PECFG_BAR0HMPA 0x214
+#define PECFG_BAR1MPA 0x218
+#define PECFG_BAR2LMPA 0x220
+#define PECFG_BAR2HMPA 0x224
+
+#define PECFG_PIMEN 0x33c
+#define PECFG_PIM0LAL 0x340
+#define PECFG_PIM0LAH 0x344
+#define PECFG_PIM1LAL 0x348
+#define PECFG_PIM1LAH 0x34c
+#define PECFG_PIM01SAL 0x350
+#define PECFG_PIM01SAH 0x354
+
+#define PECFG_POM0LAL 0x380
+#define PECFG_POM0LAH 0x384
+#define PECFG_POM1LAL 0x388
+#define PECFG_POM1LAH 0x38c
+#define PECFG_POM2LAL 0x390
+#define PECFG_POM2LAH 0x394
+
+/* 460sx only */
+#define PECFG_460SX_DLLSTA 0x3f8
+
+/* 460sx Bit Mappings */
+#define PECFG_460SX_DLLSTA_LINKUP 0x00000010
+#define DCRO_PEGPL_460SX_OMR1MSKL_UOT 0x00000004
+
+/* PEGPL Bit Mappings */
+#define DCRO_PEGPL_OMRxMSKL_VAL 0x00000001
+#define DCRO_PEGPL_OMR1MSKL_UOT 0x00000002
+#define DCRO_PEGPL_OMR3MSKL_IO 0x00000002
+
+/* 476FPE */
+#define PCCFG_LCPA 0x270
+#define PECFG_TLDLP 0x3F8
+#define PECFG_TLDLP_LNKUP 0x00000008
+#define PECFG_TLDLP_PRESENT 0x00000010
+#define DCRO_PEGPL_476FPE_OMR1MSKL_UOT 0x00000004
+
+/* SDR Bit Mappings */
+#define PESDRx_RCSSET_HLDPLB 0x10000000
+#define PESDRx_RCSSET_RSTGU 0x01000000
+#define PESDRx_RCSSET_RDY 0x00100000
+#define PESDRx_RCSSET_RSTDL 0x00010000
+#define PESDRx_RCSSET_RSTPYN 0x00001000
+
+enum
+{
+ PTYPE_ENDPOINT = 0x0,
+ PTYPE_LEGACY_ENDPOINT = 0x1,
+ PTYPE_ROOT_PORT = 0x4,
+
+ LNKW_X1 = 0x1,
+ LNKW_X4 = 0x4,
+ LNKW_X8 = 0x8
+};
+
+
+#endif /* __PPC4XX_PCI_H__ */
diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c
new file mode 100644
index 000000000..b2d940437
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/soc.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IBM/AMCC PPC4xx SoC setup code
+ *
+ * Copyright 2008 DENX Software Engineering, Stefan Roese <sr@denx.de>
+ *
+ * L2 cache routines cloned from arch/ppc/syslib/ibm440gx_common.c which is:
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003 - 2006 Zultys Technologies
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <asm/reg.h>
+#include <asm/ppc4xx.h>
+
+static u32 dcrbase_l2c;
+
+/*
+ * L2-cache
+ */
+
+/* Issue L2C diagnostic command */
+static inline u32 l2c_diag(u32 addr)
+{
+ mtdcr(dcrbase_l2c + DCRN_L2C0_ADDR, addr);
+ mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_DIAG);
+ while (!(mfdcr(dcrbase_l2c + DCRN_L2C0_SR) & L2C_SR_CC))
+ ;
+
+ return mfdcr(dcrbase_l2c + DCRN_L2C0_DATA);
+}
+
+static irqreturn_t l2c_error_handler(int irq, void *dev)
+{
+ u32 sr = mfdcr(dcrbase_l2c + DCRN_L2C0_SR);
+
+ if (sr & L2C_SR_CPE) {
+ /* Read cache trapped address */
+ u32 addr = l2c_diag(0x42000000);
+ printk(KERN_EMERG "L2C: Cache Parity Error, addr[16:26] = 0x%08x\n",
+ addr);
+ }
+ if (sr & L2C_SR_TPE) {
+ /* Read tag trapped address */
+ u32 addr = l2c_diag(0x82000000) >> 16;
+ printk(KERN_EMERG "L2C: Tag Parity Error, addr[16:26] = 0x%08x\n",
+ addr);
+ }
+
+ /* Clear parity errors */
+ if (sr & (L2C_SR_CPE | L2C_SR_TPE)){
+ mtdcr(dcrbase_l2c + DCRN_L2C0_ADDR, 0);
+ mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_CCP | L2C_CMD_CTE);
+ } else {
+ printk(KERN_EMERG "L2C: LRU error\n");
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int __init ppc4xx_l2c_probe(void)
+{
+ struct device_node *np;
+ u32 r;
+ unsigned long flags;
+ int irq;
+ const u32 *dcrreg;
+ u32 dcrbase_isram;
+ int len;
+ const u32 *prop;
+ u32 l2_size;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,l2-cache");
+ if (!np)
+ return 0;
+
+ /* Get l2 cache size */
+ prop = of_get_property(np, "cache-size", NULL);
+ if (prop == NULL) {
+ printk(KERN_ERR "%pOF: Can't get cache-size!\n", np);
+ of_node_put(np);
+ return -ENODEV;
+ }
+ l2_size = prop[0];
+
+ /* Map DCRs */
+ dcrreg = of_get_property(np, "dcr-reg", &len);
+ if (!dcrreg || (len != 4 * sizeof(u32))) {
+ printk(KERN_ERR "%pOF: Can't get DCR register base !", np);
+ of_node_put(np);
+ return -ENODEV;
+ }
+ dcrbase_isram = dcrreg[0];
+ dcrbase_l2c = dcrreg[2];
+
+ /* Get and map irq number from device tree */
+ irq = irq_of_parse_and_map(np, 0);
+ if (!irq) {
+ printk(KERN_ERR "irq_of_parse_and_map failed\n");
+ of_node_put(np);
+ return -ENODEV;
+ }
+
+ /* Install error handler */
+ if (request_irq(irq, l2c_error_handler, 0, "L2C", 0) < 0) {
+ printk(KERN_ERR "Cannot install L2C error handler"
+ ", cache is not enabled\n");
+ of_node_put(np);
+ return -ENODEV;
+ }
+
+ local_irq_save(flags);
+ asm volatile ("sync" ::: "memory");
+
+ /* Disable SRAM */
+ mtdcr(dcrbase_isram + DCRN_SRAM0_DPC,
+ mfdcr(dcrbase_isram + DCRN_SRAM0_DPC) & ~SRAM_DPC_ENABLE);
+ mtdcr(dcrbase_isram + DCRN_SRAM0_SB0CR,
+ mfdcr(dcrbase_isram + DCRN_SRAM0_SB0CR) & ~SRAM_SBCR_BU_MASK);
+ mtdcr(dcrbase_isram + DCRN_SRAM0_SB1CR,
+ mfdcr(dcrbase_isram + DCRN_SRAM0_SB1CR) & ~SRAM_SBCR_BU_MASK);
+ mtdcr(dcrbase_isram + DCRN_SRAM0_SB2CR,
+ mfdcr(dcrbase_isram + DCRN_SRAM0_SB2CR) & ~SRAM_SBCR_BU_MASK);
+ mtdcr(dcrbase_isram + DCRN_SRAM0_SB3CR,
+ mfdcr(dcrbase_isram + DCRN_SRAM0_SB3CR) & ~SRAM_SBCR_BU_MASK);
+
+ /* Enable L2_MODE without ICU/DCU */
+ r = mfdcr(dcrbase_l2c + DCRN_L2C0_CFG) &
+ ~(L2C_CFG_ICU | L2C_CFG_DCU | L2C_CFG_SS_MASK);
+ r |= L2C_CFG_L2M | L2C_CFG_SS_256;
+ mtdcr(dcrbase_l2c + DCRN_L2C0_CFG, r);
+
+ mtdcr(dcrbase_l2c + DCRN_L2C0_ADDR, 0);
+
+ /* Hardware Clear Command */
+ mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_HCC);
+ while (!(mfdcr(dcrbase_l2c + DCRN_L2C0_SR) & L2C_SR_CC))
+ ;
+
+ /* Clear Cache Parity and Tag Errors */
+ mtdcr(dcrbase_l2c + DCRN_L2C0_CMD, L2C_CMD_CCP | L2C_CMD_CTE);
+
+ /* Enable 64G snoop region starting at 0 */
+ r = mfdcr(dcrbase_l2c + DCRN_L2C0_SNP0) &
+ ~(L2C_SNP_BA_MASK | L2C_SNP_SSR_MASK);
+ r |= L2C_SNP_SSR_32G | L2C_SNP_ESR;
+ mtdcr(dcrbase_l2c + DCRN_L2C0_SNP0, r);
+
+ r = mfdcr(dcrbase_l2c + DCRN_L2C0_SNP1) &
+ ~(L2C_SNP_BA_MASK | L2C_SNP_SSR_MASK);
+ r |= 0x80000000 | L2C_SNP_SSR_32G | L2C_SNP_ESR;
+ mtdcr(dcrbase_l2c + DCRN_L2C0_SNP1, r);
+
+ asm volatile ("sync" ::: "memory");
+
+ /* Enable ICU/DCU ports */
+ r = mfdcr(dcrbase_l2c + DCRN_L2C0_CFG);
+ r &= ~(L2C_CFG_DCW_MASK | L2C_CFG_PMUX_MASK | L2C_CFG_PMIM
+ | L2C_CFG_TPEI | L2C_CFG_CPEI | L2C_CFG_NAM | L2C_CFG_NBRM);
+ r |= L2C_CFG_ICU | L2C_CFG_DCU | L2C_CFG_TPC | L2C_CFG_CPC | L2C_CFG_FRAN
+ | L2C_CFG_CPIM | L2C_CFG_TPIM | L2C_CFG_LIM | L2C_CFG_SMCM;
+
+ /* Check for 460EX/GT special handling */
+ if (of_device_is_compatible(np, "ibm,l2-cache-460ex") ||
+ of_device_is_compatible(np, "ibm,l2-cache-460gt"))
+ r |= L2C_CFG_RDBW;
+
+ mtdcr(dcrbase_l2c + DCRN_L2C0_CFG, r);
+
+ asm volatile ("sync; isync" ::: "memory");
+ local_irq_restore(flags);
+
+ printk(KERN_INFO "%dk L2-cache enabled\n", l2_size >> 10);
+
+ of_node_put(np);
+ return 0;
+}
+arch_initcall(ppc4xx_l2c_probe);
+
+/*
+ * Apply a system reset. Alternatively a board specific value may be
+ * provided via the "reset-type" property in the cpu node.
+ */
+void ppc4xx_reset_system(char *cmd)
+{
+ struct device_node *np;
+ u32 reset_type = DBCR0_RST_SYSTEM;
+ const u32 *prop;
+
+ np = of_get_cpu_node(0, NULL);
+ if (np) {
+ prop = of_get_property(np, "reset-type", NULL);
+
+ /*
+ * Check if property exists and if it is in range:
+ * 1 - PPC4xx core reset
+ * 2 - PPC4xx chip reset
+ * 3 - PPC4xx system reset (default)
+ */
+ if ((prop) && ((prop[0] >= 1) && (prop[0] <= 3)))
+ reset_type = prop[0] << 28;
+ }
+
+ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | reset_type);
+
+ while (1)
+ ; /* Just in case the reset doesn't work */
+}
diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c
new file mode 100644
index 000000000..e3e148b9d
--- /dev/null
+++ b/arch/powerpc/platforms/4xx/uic.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/sysdev/uic.c
+ *
+ * IBM PowerPC 4xx Universal Interrupt Controller
+ *
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/dcr.h>
+#include <asm/uic.h>
+
+#define NR_UIC_INTS 32
+
+#define UIC_SR 0x0
+#define UIC_ER 0x2
+#define UIC_CR 0x3
+#define UIC_PR 0x4
+#define UIC_TR 0x5
+#define UIC_MSR 0x6
+#define UIC_VR 0x7
+#define UIC_VCR 0x8
+
+struct uic *primary_uic;
+
+struct uic {
+ int index;
+ int dcrbase;
+
+ raw_spinlock_t lock;
+
+ /* The remapper for this UIC */
+ struct irq_domain *irqhost;
+};
+
+static void uic_unmask_irq(struct irq_data *d)
+{
+ struct uic *uic = irq_data_get_irq_chip_data(d);
+ unsigned int src = irqd_to_hwirq(d);
+ unsigned long flags;
+ u32 er, sr;
+
+ sr = 1 << (31-src);
+ raw_spin_lock_irqsave(&uic->lock, flags);
+ /* ack level-triggered interrupts here */
+ if (irqd_is_level_type(d))
+ mtdcr(uic->dcrbase + UIC_SR, sr);
+ er = mfdcr(uic->dcrbase + UIC_ER);
+ er |= sr;
+ mtdcr(uic->dcrbase + UIC_ER, er);
+ raw_spin_unlock_irqrestore(&uic->lock, flags);
+}
+
+static void uic_mask_irq(struct irq_data *d)
+{
+ struct uic *uic = irq_data_get_irq_chip_data(d);
+ unsigned int src = irqd_to_hwirq(d);
+ unsigned long flags;
+ u32 er;
+
+ raw_spin_lock_irqsave(&uic->lock, flags);
+ er = mfdcr(uic->dcrbase + UIC_ER);
+ er &= ~(1 << (31 - src));
+ mtdcr(uic->dcrbase + UIC_ER, er);
+ raw_spin_unlock_irqrestore(&uic->lock, flags);
+}
+
+static void uic_ack_irq(struct irq_data *d)
+{
+ struct uic *uic = irq_data_get_irq_chip_data(d);
+ unsigned int src = irqd_to_hwirq(d);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&uic->lock, flags);
+ mtdcr(uic->dcrbase + UIC_SR, 1 << (31-src));
+ raw_spin_unlock_irqrestore(&uic->lock, flags);
+}
+
+static void uic_mask_ack_irq(struct irq_data *d)
+{
+ struct uic *uic = irq_data_get_irq_chip_data(d);
+ unsigned int src = irqd_to_hwirq(d);
+ unsigned long flags;
+ u32 er, sr;
+
+ sr = 1 << (31-src);
+ raw_spin_lock_irqsave(&uic->lock, flags);
+ er = mfdcr(uic->dcrbase + UIC_ER);
+ er &= ~sr;
+ mtdcr(uic->dcrbase + UIC_ER, er);
+ /* On the UIC, acking (i.e. clearing the SR bit)
+ * a level irq will have no effect if the interrupt
+ * is still asserted by the device, even if
+ * the interrupt is already masked. Therefore
+ * we only ack the egde interrupts here, while
+ * level interrupts are ack'ed after the actual
+ * isr call in the uic_unmask_irq()
+ */
+ if (!irqd_is_level_type(d))
+ mtdcr(uic->dcrbase + UIC_SR, sr);
+ raw_spin_unlock_irqrestore(&uic->lock, flags);
+}
+
+static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+ struct uic *uic = irq_data_get_irq_chip_data(d);
+ unsigned int src = irqd_to_hwirq(d);
+ unsigned long flags;
+ int trigger, polarity;
+ u32 tr, pr, mask;
+
+ switch (flow_type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_NONE:
+ uic_mask_irq(d);
+ return 0;
+
+ case IRQ_TYPE_EDGE_RISING:
+ trigger = 1; polarity = 1;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ trigger = 1; polarity = 0;
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+ trigger = 0; polarity = 1;
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+ trigger = 0; polarity = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ mask = ~(1 << (31 - src));
+
+ raw_spin_lock_irqsave(&uic->lock, flags);
+ tr = mfdcr(uic->dcrbase + UIC_TR);
+ pr = mfdcr(uic->dcrbase + UIC_PR);
+ tr = (tr & mask) | (trigger << (31-src));
+ pr = (pr & mask) | (polarity << (31-src));
+
+ mtdcr(uic->dcrbase + UIC_PR, pr);
+ mtdcr(uic->dcrbase + UIC_TR, tr);
+ mtdcr(uic->dcrbase + UIC_SR, ~mask);
+
+ raw_spin_unlock_irqrestore(&uic->lock, flags);
+
+ return 0;
+}
+
+static struct irq_chip uic_irq_chip = {
+ .name = "UIC",
+ .irq_unmask = uic_unmask_irq,
+ .irq_mask = uic_mask_irq,
+ .irq_mask_ack = uic_mask_ack_irq,
+ .irq_ack = uic_ack_irq,
+ .irq_set_type = uic_set_irq_type,
+};
+
+static int uic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ struct uic *uic = h->host_data;
+
+ irq_set_chip_data(virq, uic);
+ /* Despite the name, handle_level_irq() works for both level
+ * and edge irqs on UIC. FIXME: check this is correct */
+ irq_set_chip_and_handler(virq, &uic_irq_chip, handle_level_irq);
+
+ /* Set default irq type */
+ irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+ return 0;
+}
+
+static const struct irq_domain_ops uic_host_ops = {
+ .map = uic_host_map,
+ .xlate = irq_domain_xlate_twocell,
+};
+
+static void uic_irq_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct irq_data *idata = irq_desc_get_irq_data(desc);
+ struct uic *uic = irq_desc_get_handler_data(desc);
+ u32 msr;
+ int src;
+
+ raw_spin_lock(&desc->lock);
+ if (irqd_is_level_type(idata))
+ chip->irq_mask(idata);
+ else
+ chip->irq_mask_ack(idata);
+ raw_spin_unlock(&desc->lock);
+
+ msr = mfdcr(uic->dcrbase + UIC_MSR);
+ if (!msr) /* spurious interrupt */
+ goto uic_irq_ret;
+
+ src = 32 - ffs(msr);
+
+ generic_handle_domain_irq(uic->irqhost, src);
+
+uic_irq_ret:
+ raw_spin_lock(&desc->lock);
+ if (irqd_is_level_type(idata))
+ chip->irq_ack(idata);
+ if (!irqd_irq_disabled(idata) && chip->irq_unmask)
+ chip->irq_unmask(idata);
+ raw_spin_unlock(&desc->lock);
+}
+
+static struct uic * __init uic_init_one(struct device_node *node)
+{
+ struct uic *uic;
+ const u32 *indexp, *dcrreg;
+ int len;
+
+ BUG_ON(! of_device_is_compatible(node, "ibm,uic"));
+
+ uic = kzalloc(sizeof(*uic), GFP_KERNEL);
+ if (! uic)
+ return NULL; /* FIXME: panic? */
+
+ raw_spin_lock_init(&uic->lock);
+ indexp = of_get_property(node, "cell-index", &len);
+ if (!indexp || (len != sizeof(u32))) {
+ printk(KERN_ERR "uic: Device node %pOF has missing or invalid "
+ "cell-index property\n", node);
+ return NULL;
+ }
+ uic->index = *indexp;
+
+ dcrreg = of_get_property(node, "dcr-reg", &len);
+ if (!dcrreg || (len != 2*sizeof(u32))) {
+ printk(KERN_ERR "uic: Device node %pOF has missing or invalid "
+ "dcr-reg property\n", node);
+ return NULL;
+ }
+ uic->dcrbase = *dcrreg;
+
+ uic->irqhost = irq_domain_add_linear(node, NR_UIC_INTS, &uic_host_ops,
+ uic);
+ if (! uic->irqhost)
+ return NULL; /* FIXME: panic? */
+
+ /* Start with all interrupts disabled, level and non-critical */
+ mtdcr(uic->dcrbase + UIC_ER, 0);
+ mtdcr(uic->dcrbase + UIC_CR, 0);
+ mtdcr(uic->dcrbase + UIC_TR, 0);
+ /* Clear any pending interrupts, in case the firmware left some */
+ mtdcr(uic->dcrbase + UIC_SR, 0xffffffff);
+
+ printk ("UIC%d (%d IRQ sources) at DCR 0x%x\n", uic->index,
+ NR_UIC_INTS, uic->dcrbase);
+
+ return uic;
+}
+
+void __init uic_init_tree(void)
+{
+ struct device_node *np;
+ struct uic *uic;
+ const u32 *interrupts;
+
+ /* First locate and initialize the top-level UIC */
+ for_each_compatible_node(np, NULL, "ibm,uic") {
+ interrupts = of_get_property(np, "interrupts", NULL);
+ if (!interrupts)
+ break;
+ }
+
+ BUG_ON(!np); /* uic_init_tree() assumes there's a UIC as the
+ * top-level interrupt controller */
+ primary_uic = uic_init_one(np);
+ if (!primary_uic)
+ panic("Unable to initialize primary UIC %pOF\n", np);
+
+ irq_set_default_host(primary_uic->irqhost);
+ of_node_put(np);
+
+ /* The scan again for cascaded UICs */
+ for_each_compatible_node(np, NULL, "ibm,uic") {
+ interrupts = of_get_property(np, "interrupts", NULL);
+ if (interrupts) {
+ /* Secondary UIC */
+ int cascade_virq;
+
+ uic = uic_init_one(np);
+ if (! uic)
+ panic("Unable to initialize a secondary UIC %pOF\n",
+ np);
+
+ cascade_virq = irq_of_parse_and_map(np, 0);
+
+ irq_set_handler_data(cascade_virq, uic);
+ irq_set_chained_handler(cascade_virq, uic_irq_cascade);
+
+ /* FIXME: setup critical cascade?? */
+ }
+ }
+}
+
+/* Return an interrupt vector or 0 if no interrupt is pending. */
+unsigned int uic_get_irq(void)
+{
+ u32 msr;
+ int src;
+
+ BUG_ON(! primary_uic);
+
+ msr = mfdcr(primary_uic->dcrbase + UIC_MSR);
+ src = 32 - ffs(msr);
+
+ return irq_linear_revmap(primary_uic->irqhost, src);
+}
diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig
new file mode 100644
index 000000000..deecede78
--- /dev/null
+++ b/arch/powerpc/platforms/512x/Kconfig
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MPC512x
+ bool "512x-based boards"
+ depends on PPC_BOOK3S_32
+ select COMMON_CLK
+ select FSL_SOC
+ select IPIC
+ select HAVE_PCI
+ select FSL_PCI if PCI
+ select USB_EHCI_BIG_ENDIAN_MMIO if USB_EHCI_HCD
+ select USB_EHCI_BIG_ENDIAN_DESC if USB_EHCI_HCD
+
+config MPC512x_LPBFIFO
+ tristate "MPC512x LocalPlus Bus FIFO driver"
+ depends on PPC_MPC512x && MPC512X_DMA
+ help
+ Enable support for Freescale MPC512x LocalPlus Bus FIFO (SCLPC).
+
+config MPC5121_ADS
+ bool "Freescale MPC5121E ADS"
+ depends on PPC_MPC512x
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the MPC5121E ADS board.
+
+config MPC512x_GENERIC
+ bool "Generic support for simple MPC512x based boards"
+ depends on PPC_MPC512x
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for simple MPC512x based boards
+ which do not need custom platform specific setup.
+
+ Compatible boards include: Protonic LVT base boards (ZANMCU
+ and VICVT2), Freescale MPC5125 Tower system.
+
+config PDM360NG
+ bool "ifm PDM360NG board"
+ depends on PPC_MPC512x
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the PDM360NG board.
diff --git a/arch/powerpc/platforms/512x/Makefile b/arch/powerpc/platforms/512x/Makefile
new file mode 100644
index 000000000..2daf22ee2
--- /dev/null
+++ b/arch/powerpc/platforms/512x/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Freescale PowerPC 512x linux kernel.
+#
+obj-$(CONFIG_COMMON_CLK) += clock-commonclk.o
+obj-y += mpc512x_shared.o
+obj-$(CONFIG_MPC5121_ADS) += mpc5121_ads.o mpc5121_ads_cpld.o
+obj-$(CONFIG_MPC512x_GENERIC) += mpc512x_generic.o
+obj-$(CONFIG_MPC512x_LPBFIFO) += mpc512x_lpbfifo.o
+obj-$(CONFIG_PDM360NG) += pdm360ng.o
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
new file mode 100644
index 000000000..079cb3627
--- /dev/null
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -0,0 +1,1224 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2013 DENX Software Engineering
+ *
+ * Gerhard Sittig, <gsi@denx.de>
+ *
+ * common clock driver support for the MPC512x platform
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/mpc5121.h>
+#include <dt-bindings/clock/mpc512x-clock.h>
+
+#include "mpc512x.h" /* our public mpc5121_clk_init() API */
+
+/* helpers to keep the MCLK intermediates "somewhere" in our table */
+enum {
+ MCLK_IDX_MUX0,
+ MCLK_IDX_EN0,
+ MCLK_IDX_DIV0,
+ MCLK_MAX_IDX,
+};
+
+#define NR_PSCS 12
+#define NR_MSCANS 4
+#define NR_SPDIFS 1
+#define NR_OUTCLK 4
+#define NR_MCLKS (NR_PSCS + NR_MSCANS + NR_SPDIFS + NR_OUTCLK)
+
+/* extend the public set of clocks by adding internal slots for management */
+enum {
+ /* arrange for adjacent numbers after the public set */
+ MPC512x_CLK_START_PRIVATE = MPC512x_CLK_LAST_PUBLIC,
+ /* clocks which aren't announced to the public */
+ MPC512x_CLK_DDR,
+ MPC512x_CLK_MEM,
+ MPC512x_CLK_IIM,
+ /* intermediates in div+gate combos or fractional dividers */
+ MPC512x_CLK_DDR_UG,
+ MPC512x_CLK_SDHC_x4,
+ MPC512x_CLK_SDHC_UG,
+ MPC512x_CLK_SDHC2_UG,
+ MPC512x_CLK_DIU_x4,
+ MPC512x_CLK_DIU_UG,
+ MPC512x_CLK_MBX_BUS_UG,
+ MPC512x_CLK_MBX_UG,
+ MPC512x_CLK_MBX_3D_UG,
+ MPC512x_CLK_PCI_UG,
+ MPC512x_CLK_NFC_UG,
+ MPC512x_CLK_LPC_UG,
+ MPC512x_CLK_SPDIF_TX_IN,
+ /* intermediates for the mux+gate+div+mux MCLK generation */
+ MPC512x_CLK_MCLKS_FIRST,
+ MPC512x_CLK_MCLKS_LAST = MPC512x_CLK_MCLKS_FIRST
+ + NR_MCLKS * MCLK_MAX_IDX,
+ /* internal, symbolic spec for the number of slots */
+ MPC512x_CLK_LAST_PRIVATE,
+};
+
+/* data required for the OF clock provider registration */
+static struct clk *clks[MPC512x_CLK_LAST_PRIVATE];
+static struct clk_onecell_data clk_data;
+
+/* CCM register access */
+static struct mpc512x_ccm __iomem *clkregs;
+static DEFINE_SPINLOCK(clklock);
+
+/* SoC variants {{{ */
+
+/*
+ * tell SoC variants apart as they are rather similar yet not identical,
+ * cache the result in an enum to not repeatedly run the expensive OF test
+ *
+ * MPC5123 is an MPC5121 without the MBX graphics accelerator
+ *
+ * MPC5125 has many more differences: no MBX, no AXE, no VIU, no SPDIF,
+ * no PATA, no SATA, no PCI, two FECs (of different compatibility name),
+ * only 10 PSCs (of different compatibility name), two SDHCs, different
+ * NFC IP block, output clocks, system PLL status query, different CPMF
+ * interpretation, no CFM, different fourth PSC/CAN mux0 input -- yet
+ * those differences can get folded into this clock provider support
+ * code and don't warrant a separate highly redundant implementation
+ */
+
+static enum soc_type {
+ MPC512x_SOC_MPC5121,
+ MPC512x_SOC_MPC5123,
+ MPC512x_SOC_MPC5125,
+} soc;
+
+static void __init mpc512x_clk_determine_soc(void)
+{
+ if (of_machine_is_compatible("fsl,mpc5121")) {
+ soc = MPC512x_SOC_MPC5121;
+ return;
+ }
+ if (of_machine_is_compatible("fsl,mpc5123")) {
+ soc = MPC512x_SOC_MPC5123;
+ return;
+ }
+ if (of_machine_is_compatible("fsl,mpc5125")) {
+ soc = MPC512x_SOC_MPC5125;
+ return;
+ }
+}
+
+static bool __init soc_has_mbx(void)
+{
+ if (soc == MPC512x_SOC_MPC5121)
+ return true;
+ return false;
+}
+
+static bool __init soc_has_axe(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return false;
+ return true;
+}
+
+static bool __init soc_has_viu(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return false;
+ return true;
+}
+
+static bool __init soc_has_spdif(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return false;
+ return true;
+}
+
+static bool __init soc_has_pata(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return false;
+ return true;
+}
+
+static bool __init soc_has_sata(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return false;
+ return true;
+}
+
+static bool __init soc_has_pci(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return false;
+ return true;
+}
+
+static bool __init soc_has_fec2(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return true;
+ return false;
+}
+
+static int __init soc_max_pscnum(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return 10;
+ return 12;
+}
+
+static bool __init soc_has_sdhc2(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return true;
+ return false;
+}
+
+static bool __init soc_has_nfc_5125(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return true;
+ return false;
+}
+
+static bool __init soc_has_outclk(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return true;
+ return false;
+}
+
+static bool __init soc_has_cpmf_0_bypass(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return true;
+ return false;
+}
+
+static bool __init soc_has_mclk_mux0_canin(void)
+{
+ if (soc == MPC512x_SOC_MPC5125)
+ return true;
+ return false;
+}
+
+/* }}} SoC variants */
+/* common clk API wrappers {{{ */
+
+/* convenience wrappers around the common clk API */
+static inline struct clk *mpc512x_clk_fixed(const char *name, int rate)
+{
+ return clk_register_fixed_rate(NULL, name, NULL, 0, rate);
+}
+
+static inline struct clk *mpc512x_clk_factor(
+ const char *name, const char *parent_name,
+ int mul, int div)
+{
+ int clkflags;
+
+ clkflags = CLK_SET_RATE_PARENT;
+ return clk_register_fixed_factor(NULL, name, parent_name, clkflags,
+ mul, div);
+}
+
+static inline struct clk *mpc512x_clk_divider(
+ const char *name, const char *parent_name, u8 clkflags,
+ u32 __iomem *reg, u8 pos, u8 len, int divflags)
+{
+ divflags |= CLK_DIVIDER_BIG_ENDIAN;
+ return clk_register_divider(NULL, name, parent_name, clkflags,
+ reg, pos, len, divflags, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_divtable(
+ const char *name, const char *parent_name,
+ u32 __iomem *reg, u8 pos, u8 len,
+ const struct clk_div_table *divtab)
+{
+ u8 divflags;
+
+ divflags = CLK_DIVIDER_BIG_ENDIAN;
+ return clk_register_divider_table(NULL, name, parent_name, 0,
+ reg, pos, len, divflags,
+ divtab, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_gated(
+ const char *name, const char *parent_name,
+ u32 __iomem *reg, u8 pos)
+{
+ int clkflags;
+ u8 gateflags;
+
+ clkflags = CLK_SET_RATE_PARENT;
+ gateflags = CLK_GATE_BIG_ENDIAN;
+ return clk_register_gate(NULL, name, parent_name, clkflags,
+ reg, pos, gateflags, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_muxed(const char *name,
+ const char **parent_names, int parent_count,
+ u32 __iomem *reg, u8 pos, u8 len)
+{
+ int clkflags;
+ u8 muxflags;
+
+ clkflags = CLK_SET_RATE_PARENT;
+ muxflags = CLK_MUX_BIG_ENDIAN;
+ return clk_register_mux(NULL, name,
+ parent_names, parent_count, clkflags,
+ reg, pos, len, muxflags, &clklock);
+}
+
+/* }}} common clk API wrappers */
+
+/* helper to isolate a bit field from a register */
+static inline int get_bit_field(uint32_t __iomem *reg, uint8_t pos, uint8_t len)
+{
+ uint32_t val;
+
+ val = in_be32(reg);
+ val >>= pos;
+ val &= (1 << len) - 1;
+ return val;
+}
+
+/* get the SPMF and translate it into the "sys pll" multiplier */
+static int __init get_spmf_mult(void)
+{
+ static int spmf_to_mult[] = {
+ 68, 1, 12, 16, 20, 24, 28, 32,
+ 36, 40, 44, 48, 52, 56, 60, 64,
+ };
+ int spmf;
+
+ spmf = get_bit_field(&clkregs->spmr, 24, 4);
+ return spmf_to_mult[spmf];
+}
+
+/*
+ * get the SYS_DIV value and translate it into a divide factor
+ *
+ * values returned from here are a multiple of the real factor since the
+ * divide ratio is fractional
+ */
+static int __init get_sys_div_x2(void)
+{
+ static int sysdiv_code_to_x2[] = {
+ 4, 5, 6, 7, 8, 9, 10, 14,
+ 12, 16, 18, 22, 20, 24, 26, 30,
+ 28, 32, 34, 38, 36, 40, 42, 46,
+ 44, 48, 50, 54, 52, 56, 58, 62,
+ 60, 64, 66,
+ };
+ int divcode;
+
+ divcode = get_bit_field(&clkregs->scfr2, 26, 6);
+ return sysdiv_code_to_x2[divcode];
+}
+
+/*
+ * get the CPMF value and translate it into a multiplier factor
+ *
+ * values returned from here are a multiple of the real factor since the
+ * multiplier ratio is fractional
+ */
+static int __init get_cpmf_mult_x2(void)
+{
+ static int cpmf_to_mult_x36[] = {
+ /* 0b000 is "times 36" */
+ 72, 2, 2, 3, 4, 5, 6, 7,
+ };
+ static int cpmf_to_mult_0by[] = {
+ /* 0b000 is "bypass" */
+ 2, 2, 2, 3, 4, 5, 6, 7,
+ };
+
+ int *cpmf_to_mult;
+ int cpmf;
+
+ cpmf = get_bit_field(&clkregs->spmr, 16, 4);
+ if (soc_has_cpmf_0_bypass())
+ cpmf_to_mult = cpmf_to_mult_0by;
+ else
+ cpmf_to_mult = cpmf_to_mult_x36;
+ return cpmf_to_mult[cpmf];
+}
+
+/*
+ * some of the clock dividers do scale in a linear way, yet not all of
+ * their bit combinations are legal; use a divider table to get a
+ * resulting set of applicable divider values
+ */
+
+/* applies to the IPS_DIV, and PCI_DIV values */
+static const struct clk_div_table divtab_2346[] = {
+ { .val = 2, .div = 2, },
+ { .val = 3, .div = 3, },
+ { .val = 4, .div = 4, },
+ { .val = 6, .div = 6, },
+ { .div = 0, },
+};
+
+/* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */
+static const struct clk_div_table divtab_1234[] = {
+ { .val = 1, .div = 1, },
+ { .val = 2, .div = 2, },
+ { .val = 3, .div = 3, },
+ { .val = 4, .div = 4, },
+ { .div = 0, },
+};
+
+static int __init get_freq_from_dt(char *propname)
+{
+ struct device_node *np;
+ const unsigned int *prop;
+ int val;
+
+ val = 0;
+ np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-immr");
+ if (np) {
+ prop = of_get_property(np, propname, NULL);
+ if (prop)
+ val = *prop;
+ of_node_put(np);
+ }
+ return val;
+}
+
+static void __init mpc512x_clk_preset_data(void)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(clks); i++)
+ clks[i] = ERR_PTR(-ENODEV);
+}
+
+/*
+ * - receives the "bus frequency" from the caller (that's the IPS clock
+ * rate, the historical source of clock information)
+ * - fetches the system PLL multiplier and divider values as well as the
+ * IPS divider value from hardware
+ * - determines the REF clock rate either from the XTAL/OSC spec (if
+ * there is a device tree node describing the oscillator) or from the
+ * IPS bus clock (supported for backwards compatibility, such that
+ * setups without XTAL/OSC specs keep working)
+ * - creates the "ref" clock item in the clock tree, such that
+ * subsequent code can create the remainder of the hierarchy (REF ->
+ * SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div
+ * values
+ */
+static void __init mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq,
+ int *sys_mul, int *sys_div,
+ int *ips_div)
+{
+ struct clk *osc_clk;
+ int calc_freq;
+
+ /* fetch mul/div factors from the hardware */
+ *sys_mul = get_spmf_mult();
+ *sys_mul *= 2; /* compensate for the fractional divider */
+ *sys_div = get_sys_div_x2();
+ *ips_div = get_bit_field(&clkregs->scfr1, 23, 3);
+
+ /* lookup the oscillator clock for its rate */
+ osc_clk = of_clk_get_by_name(np, "osc");
+
+ /*
+ * either descend from OSC to REF (and in bypassing verify the
+ * IPS rate), or backtrack from IPS and multiplier values that
+ * were fetched from hardware to REF and thus to the OSC value
+ *
+ * in either case the REF clock gets created here and the
+ * remainder of the clock tree can get spanned from there
+ */
+ if (!IS_ERR(osc_clk)) {
+ clks[MPC512x_CLK_REF] = mpc512x_clk_factor("ref", "osc", 1, 1);
+ calc_freq = clk_get_rate(clks[MPC512x_CLK_REF]);
+ calc_freq *= *sys_mul;
+ calc_freq /= *sys_div;
+ calc_freq /= 2;
+ calc_freq /= *ips_div;
+ if (bus_freq && calc_freq != bus_freq)
+ pr_warn("calc rate %d != OF spec %d\n",
+ calc_freq, bus_freq);
+ } else {
+ calc_freq = bus_freq; /* start with IPS */
+ calc_freq *= *ips_div; /* IPS -> CSB */
+ calc_freq *= 2; /* CSB -> SYS */
+ calc_freq *= *sys_div; /* SYS -> PLL out */
+ calc_freq /= *sys_mul; /* PLL out -> REF == OSC */
+ clks[MPC512x_CLK_REF] = mpc512x_clk_fixed("ref", calc_freq);
+ }
+}
+
+/* MCLK helpers {{{ */
+
+/*
+ * helper code for the MCLK subtree setup
+ *
+ * the overview in section 5.2.4 of the MPC5121e Reference Manual rev4
+ * suggests that all instances of the "PSC clock generation" are equal,
+ * and that one might re-use the PSC setup for MSCAN clock generation
+ * (section 5.2.5) as well, at least the logic if not the data for
+ * description
+ *
+ * the details (starting at page 5-20) show differences in the specific
+ * inputs of the first mux stage ("can clk in", "spdif tx"), and the
+ * factual non-availability of the second mux stage (it's present yet
+ * only one input is valid)
+ *
+ * the MSCAN clock related registers (starting at page 5-35) all
+ * reference "spdif clk" at the first mux stage and don't mention any
+ * "can clk" at all, which somehow is unexpected
+ *
+ * TODO re-check the document, and clarify whether the RM is correct in
+ * the overview or in the details, and whether the difference is a
+ * clipboard induced error or results from chip revisions
+ *
+ * it turns out that the RM rev4 as of 2012-06 talks about "can" for the
+ * PSCs while RM rev3 as of 2008-10 talks about "spdif", so I guess that
+ * first a doc update is required which better reflects reality in the
+ * SoC before the implementation should follow while no questions remain
+ */
+
+/*
+ * note that this declaration raises a checkpatch warning, but
+ * it's the very data type dictated by <linux/clk-provider.h>,
+ * "fixing" this warning will break compilation
+ */
+static const char *parent_names_mux0_spdif[] = {
+ "sys", "ref", "psc-mclk-in", "spdif-tx",
+};
+
+static const char *parent_names_mux0_canin[] = {
+ "sys", "ref", "psc-mclk-in", "can-clk-in",
+};
+
+enum mclk_type {
+ MCLK_TYPE_PSC,
+ MCLK_TYPE_MSCAN,
+ MCLK_TYPE_SPDIF,
+ MCLK_TYPE_OUTCLK,
+};
+
+struct mclk_setup_data {
+ enum mclk_type type;
+ bool has_mclk1;
+ const char *name_mux0;
+ const char *name_en0;
+ const char *name_div0;
+ const char *parent_names_mux1[2];
+ const char *name_mclk;
+};
+
+#define MCLK_SETUP_DATA_PSC(id) { \
+ MCLK_TYPE_PSC, 0, \
+ "psc" #id "-mux0", \
+ "psc" #id "-en0", \
+ "psc" #id "_mclk_div", \
+ { "psc" #id "_mclk_div", "dummy", }, \
+ "psc" #id "_mclk", \
+}
+
+#define MCLK_SETUP_DATA_MSCAN(id) { \
+ MCLK_TYPE_MSCAN, 0, \
+ "mscan" #id "-mux0", \
+ "mscan" #id "-en0", \
+ "mscan" #id "_mclk_div", \
+ { "mscan" #id "_mclk_div", "dummy", }, \
+ "mscan" #id "_mclk", \
+}
+
+#define MCLK_SETUP_DATA_SPDIF { \
+ MCLK_TYPE_SPDIF, 1, \
+ "spdif-mux0", \
+ "spdif-en0", \
+ "spdif_mclk_div", \
+ { "spdif_mclk_div", "spdif-rx", }, \
+ "spdif_mclk", \
+}
+
+#define MCLK_SETUP_DATA_OUTCLK(id) { \
+ MCLK_TYPE_OUTCLK, 0, \
+ "out" #id "-mux0", \
+ "out" #id "-en0", \
+ "out" #id "_mclk_div", \
+ { "out" #id "_mclk_div", "dummy", }, \
+ "out" #id "_clk", \
+}
+
+static struct mclk_setup_data mclk_psc_data[] = {
+ MCLK_SETUP_DATA_PSC(0),
+ MCLK_SETUP_DATA_PSC(1),
+ MCLK_SETUP_DATA_PSC(2),
+ MCLK_SETUP_DATA_PSC(3),
+ MCLK_SETUP_DATA_PSC(4),
+ MCLK_SETUP_DATA_PSC(5),
+ MCLK_SETUP_DATA_PSC(6),
+ MCLK_SETUP_DATA_PSC(7),
+ MCLK_SETUP_DATA_PSC(8),
+ MCLK_SETUP_DATA_PSC(9),
+ MCLK_SETUP_DATA_PSC(10),
+ MCLK_SETUP_DATA_PSC(11),
+};
+
+static struct mclk_setup_data mclk_mscan_data[] = {
+ MCLK_SETUP_DATA_MSCAN(0),
+ MCLK_SETUP_DATA_MSCAN(1),
+ MCLK_SETUP_DATA_MSCAN(2),
+ MCLK_SETUP_DATA_MSCAN(3),
+};
+
+static struct mclk_setup_data mclk_spdif_data[] = {
+ MCLK_SETUP_DATA_SPDIF,
+};
+
+static struct mclk_setup_data mclk_outclk_data[] = {
+ MCLK_SETUP_DATA_OUTCLK(0),
+ MCLK_SETUP_DATA_OUTCLK(1),
+ MCLK_SETUP_DATA_OUTCLK(2),
+ MCLK_SETUP_DATA_OUTCLK(3),
+};
+
+/* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */
+static void __init mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
+{
+ size_t clks_idx_pub, clks_idx_int;
+ u32 __iomem *mccr_reg; /* MCLK control register (mux, en, div) */
+ int div;
+
+ /* derive a few parameters from the component type and index */
+ switch (entry->type) {
+ case MCLK_TYPE_PSC:
+ clks_idx_pub = MPC512x_CLK_PSC0_MCLK + idx;
+ clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+ + (idx) * MCLK_MAX_IDX;
+ mccr_reg = &clkregs->psc_ccr[idx];
+ break;
+ case MCLK_TYPE_MSCAN:
+ clks_idx_pub = MPC512x_CLK_MSCAN0_MCLK + idx;
+ clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+ + (NR_PSCS + idx) * MCLK_MAX_IDX;
+ mccr_reg = &clkregs->mscan_ccr[idx];
+ break;
+ case MCLK_TYPE_SPDIF:
+ clks_idx_pub = MPC512x_CLK_SPDIF_MCLK;
+ clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+ + (NR_PSCS + NR_MSCANS) * MCLK_MAX_IDX;
+ mccr_reg = &clkregs->spccr;
+ break;
+ case MCLK_TYPE_OUTCLK:
+ clks_idx_pub = MPC512x_CLK_OUT0_CLK + idx;
+ clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+ + (NR_PSCS + NR_MSCANS + NR_SPDIFS + idx)
+ * MCLK_MAX_IDX;
+ mccr_reg = &clkregs->out_ccr[idx];
+ break;
+ default:
+ return;
+ }
+
+ /*
+ * this was grabbed from the PPC_CLOCK implementation, which
+ * enforced a specific MCLK divider while the clock was gated
+ * during setup (that's a documented hardware requirement)
+ *
+ * the PPC_CLOCK implementation might even have violated the
+ * "MCLK <= IPS" constraint, the fixed divider value of 1
+ * results in a divider of 2 and thus MCLK = SYS/2 which equals
+ * CSB which is greater than IPS; the serial port setup may have
+ * adjusted the divider which the clock setup might have left in
+ * an undesirable state
+ *
+ * initial setup is:
+ * - MCLK 0 from SYS
+ * - MCLK DIV such to not exceed the IPS clock
+ * - MCLK 0 enabled
+ * - MCLK 1 from MCLK DIV
+ */
+ div = clk_get_rate(clks[MPC512x_CLK_SYS]);
+ div /= clk_get_rate(clks[MPC512x_CLK_IPS]);
+ out_be32(mccr_reg, (0 << 16));
+ out_be32(mccr_reg, (0 << 16) | ((div - 1) << 17));
+ out_be32(mccr_reg, (1 << 16) | ((div - 1) << 17));
+
+ /*
+ * create the 'struct clk' items of the MCLK's clock subtree
+ *
+ * note that by design we always create all nodes and won't take
+ * shortcuts here, because
+ * - the "internal" MCLK_DIV and MCLK_OUT signal in turn are
+ * selectable inputs to the CFM while those who "actually use"
+ * the PSC/MSCAN/SPDIF (serial drivers et al) need the MCLK
+ * for their bitrate
+ * - in the absence of "aliases" for clocks we need to create
+ * individual 'struct clk' items for whatever might get
+ * referenced or looked up, even if several of those items are
+ * identical from the logical POV (their rate value)
+ * - for easier future maintenance and for better reflection of
+ * the SoC's documentation, it appears appropriate to generate
+ * clock items even for those muxers which actually are NOPs
+ * (those with two inputs of which one is reserved)
+ */
+ clks[clks_idx_int + MCLK_IDX_MUX0] = mpc512x_clk_muxed(
+ entry->name_mux0,
+ soc_has_mclk_mux0_canin()
+ ? &parent_names_mux0_canin[0]
+ : &parent_names_mux0_spdif[0],
+ ARRAY_SIZE(parent_names_mux0_spdif),
+ mccr_reg, 14, 2);
+ clks[clks_idx_int + MCLK_IDX_EN0] = mpc512x_clk_gated(
+ entry->name_en0, entry->name_mux0,
+ mccr_reg, 16);
+ clks[clks_idx_int + MCLK_IDX_DIV0] = mpc512x_clk_divider(
+ entry->name_div0,
+ entry->name_en0, CLK_SET_RATE_GATE,
+ mccr_reg, 17, 15, 0);
+ if (entry->has_mclk1) {
+ clks[clks_idx_pub] = mpc512x_clk_muxed(
+ entry->name_mclk,
+ &entry->parent_names_mux1[0],
+ ARRAY_SIZE(entry->parent_names_mux1),
+ mccr_reg, 7, 1);
+ } else {
+ clks[clks_idx_pub] = mpc512x_clk_factor(
+ entry->name_mclk,
+ entry->parent_names_mux1[0],
+ 1, 1);
+ }
+}
+
+/* }}} MCLK helpers */
+
+static void __init mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
+{
+ int sys_mul, sys_div, ips_div;
+ int mul, div;
+ size_t mclk_idx;
+ int freq;
+
+ /*
+ * developer's notes:
+ * - consider whether to handle clocks which have both gates and
+ * dividers via intermediates or by means of composites
+ * - fractional dividers appear to not map well to composites
+ * since they can be seen as a fixed multiplier and an
+ * adjustable divider, while composites can only combine at
+ * most one of a mux, div, and gate each into one 'struct clk'
+ * item
+ * - PSC/MSCAN/SPDIF clock generation OTOH already is very
+ * specific and cannot get mapped to composites (at least not
+ * a single one, maybe two of them, but then some of these
+ * intermediate clock signals get referenced elsewhere (e.g.
+ * in the clock frequency measurement, CFM) and thus need
+ * publicly available names
+ * - the current source layout appropriately reflects the
+ * hardware setup, and it works, so it's questionable whether
+ * further changes will result in big enough a benefit
+ */
+
+ /* regardless of whether XTAL/OSC exists, have REF created */
+ mpc512x_clk_setup_ref_clock(np, busfreq, &sys_mul, &sys_div, &ips_div);
+
+ /* now setup the REF -> SYS -> CSB -> IPS hierarchy */
+ clks[MPC512x_CLK_SYS] = mpc512x_clk_factor("sys", "ref",
+ sys_mul, sys_div);
+ clks[MPC512x_CLK_CSB] = mpc512x_clk_factor("csb", "sys", 1, 2);
+ clks[MPC512x_CLK_IPS] = mpc512x_clk_divtable("ips", "csb",
+ &clkregs->scfr1, 23, 3,
+ divtab_2346);
+ /* now setup anything below SYS and CSB and IPS */
+
+ clks[MPC512x_CLK_DDR_UG] = mpc512x_clk_factor("ddr-ug", "sys", 1, 2);
+
+ /*
+ * the Reference Manual discusses that for SDHC only even divide
+ * ratios are supported because clock domain synchronization
+ * between 'per' and 'ipg' is broken;
+ * keep the divider's bit 0 cleared (per reset value), and only
+ * allow to setup the divider's bits 7:1, which results in that
+ * only even divide ratios can get configured upon rate changes;
+ * keep the "x4" name because this bit shift hack is an internal
+ * implementation detail, the "fractional divider with quarters"
+ * semantics remains
+ */
+ clks[MPC512x_CLK_SDHC_x4] = mpc512x_clk_factor("sdhc-x4", "csb", 2, 1);
+ clks[MPC512x_CLK_SDHC_UG] = mpc512x_clk_divider("sdhc-ug", "sdhc-x4", 0,
+ &clkregs->scfr2, 1, 7,
+ CLK_DIVIDER_ONE_BASED);
+ if (soc_has_sdhc2()) {
+ clks[MPC512x_CLK_SDHC2_UG] = mpc512x_clk_divider(
+ "sdhc2-ug", "sdhc-x4", 0, &clkregs->scfr2,
+ 9, 7, CLK_DIVIDER_ONE_BASED);
+ }
+
+ clks[MPC512x_CLK_DIU_x4] = mpc512x_clk_factor("diu-x4", "csb", 4, 1);
+ clks[MPC512x_CLK_DIU_UG] = mpc512x_clk_divider("diu-ug", "diu-x4", 0,
+ &clkregs->scfr1, 0, 8,
+ CLK_DIVIDER_ONE_BASED);
+
+ /*
+ * the "power architecture PLL" was setup from data which was
+ * sampled from the reset config word, at this point in time the
+ * configuration can be considered fixed and read only (i.e. no
+ * longer adjustable, or no longer in need of adjustment), which
+ * is why we don't register a PLL here but assume fixed factors
+ */
+ mul = get_cpmf_mult_x2();
+ div = 2; /* compensate for the fractional factor */
+ clks[MPC512x_CLK_E300] = mpc512x_clk_factor("e300", "csb", mul, div);
+
+ if (soc_has_mbx()) {
+ clks[MPC512x_CLK_MBX_BUS_UG] = mpc512x_clk_factor(
+ "mbx-bus-ug", "csb", 1, 2);
+ clks[MPC512x_CLK_MBX_UG] = mpc512x_clk_divtable(
+ "mbx-ug", "mbx-bus-ug", &clkregs->scfr1,
+ 14, 3, divtab_1234);
+ clks[MPC512x_CLK_MBX_3D_UG] = mpc512x_clk_factor(
+ "mbx-3d-ug", "mbx-ug", 1, 1);
+ }
+ if (soc_has_pci()) {
+ clks[MPC512x_CLK_PCI_UG] = mpc512x_clk_divtable(
+ "pci-ug", "csb", &clkregs->scfr1,
+ 20, 3, divtab_2346);
+ }
+ if (soc_has_nfc_5125()) {
+ /*
+ * XXX TODO implement 5125 NFC clock setup logic,
+ * with high/low period counters in clkregs->scfr3,
+ * currently there are no users so it's ENOIMPL
+ */
+ clks[MPC512x_CLK_NFC_UG] = ERR_PTR(-ENOTSUPP);
+ } else {
+ clks[MPC512x_CLK_NFC_UG] = mpc512x_clk_divtable(
+ "nfc-ug", "ips", &clkregs->scfr1,
+ 8, 3, divtab_1234);
+ }
+ clks[MPC512x_CLK_LPC_UG] = mpc512x_clk_divtable("lpc-ug", "ips",
+ &clkregs->scfr1, 11, 3,
+ divtab_1234);
+
+ clks[MPC512x_CLK_LPC] = mpc512x_clk_gated("lpc", "lpc-ug",
+ &clkregs->sccr1, 30);
+ clks[MPC512x_CLK_NFC] = mpc512x_clk_gated("nfc", "nfc-ug",
+ &clkregs->sccr1, 29);
+ if (soc_has_pata()) {
+ clks[MPC512x_CLK_PATA] = mpc512x_clk_gated(
+ "pata", "ips", &clkregs->sccr1, 28);
+ }
+ /* for PSCs there is a "registers" gate and a bitrate MCLK subtree */
+ for (mclk_idx = 0; mclk_idx < soc_max_pscnum(); mclk_idx++) {
+ char name[12];
+ snprintf(name, sizeof(name), "psc%d", mclk_idx);
+ clks[MPC512x_CLK_PSC0 + mclk_idx] = mpc512x_clk_gated(
+ name, "ips", &clkregs->sccr1, 27 - mclk_idx);
+ mpc512x_clk_setup_mclk(&mclk_psc_data[mclk_idx], mclk_idx);
+ }
+ clks[MPC512x_CLK_PSC_FIFO] = mpc512x_clk_gated("psc-fifo", "ips",
+ &clkregs->sccr1, 15);
+ if (soc_has_sata()) {
+ clks[MPC512x_CLK_SATA] = mpc512x_clk_gated(
+ "sata", "ips", &clkregs->sccr1, 14);
+ }
+ clks[MPC512x_CLK_FEC] = mpc512x_clk_gated("fec", "ips",
+ &clkregs->sccr1, 13);
+ if (soc_has_pci()) {
+ clks[MPC512x_CLK_PCI] = mpc512x_clk_gated(
+ "pci", "pci-ug", &clkregs->sccr1, 11);
+ }
+ clks[MPC512x_CLK_DDR] = mpc512x_clk_gated("ddr", "ddr-ug",
+ &clkregs->sccr1, 10);
+ if (soc_has_fec2()) {
+ clks[MPC512x_CLK_FEC2] = mpc512x_clk_gated(
+ "fec2", "ips", &clkregs->sccr1, 9);
+ }
+
+ clks[MPC512x_CLK_DIU] = mpc512x_clk_gated("diu", "diu-ug",
+ &clkregs->sccr2, 31);
+ if (soc_has_axe()) {
+ clks[MPC512x_CLK_AXE] = mpc512x_clk_gated(
+ "axe", "csb", &clkregs->sccr2, 30);
+ }
+ clks[MPC512x_CLK_MEM] = mpc512x_clk_gated("mem", "ips",
+ &clkregs->sccr2, 29);
+ clks[MPC512x_CLK_USB1] = mpc512x_clk_gated("usb1", "csb",
+ &clkregs->sccr2, 28);
+ clks[MPC512x_CLK_USB2] = mpc512x_clk_gated("usb2", "csb",
+ &clkregs->sccr2, 27);
+ clks[MPC512x_CLK_I2C] = mpc512x_clk_gated("i2c", "ips",
+ &clkregs->sccr2, 26);
+ /* MSCAN differs from PSC with just one gate for multiple components */
+ clks[MPC512x_CLK_BDLC] = mpc512x_clk_gated("bdlc", "ips",
+ &clkregs->sccr2, 25);
+ for (mclk_idx = 0; mclk_idx < ARRAY_SIZE(mclk_mscan_data); mclk_idx++)
+ mpc512x_clk_setup_mclk(&mclk_mscan_data[mclk_idx], mclk_idx);
+ clks[MPC512x_CLK_SDHC] = mpc512x_clk_gated("sdhc", "sdhc-ug",
+ &clkregs->sccr2, 24);
+ /* there is only one SPDIF component, which shares MCLK support code */
+ if (soc_has_spdif()) {
+ clks[MPC512x_CLK_SPDIF] = mpc512x_clk_gated(
+ "spdif", "ips", &clkregs->sccr2, 23);
+ mpc512x_clk_setup_mclk(&mclk_spdif_data[0], 0);
+ }
+ if (soc_has_mbx()) {
+ clks[MPC512x_CLK_MBX_BUS] = mpc512x_clk_gated(
+ "mbx-bus", "mbx-bus-ug", &clkregs->sccr2, 22);
+ clks[MPC512x_CLK_MBX] = mpc512x_clk_gated(
+ "mbx", "mbx-ug", &clkregs->sccr2, 21);
+ clks[MPC512x_CLK_MBX_3D] = mpc512x_clk_gated(
+ "mbx-3d", "mbx-3d-ug", &clkregs->sccr2, 20);
+ }
+ clks[MPC512x_CLK_IIM] = mpc512x_clk_gated("iim", "csb",
+ &clkregs->sccr2, 19);
+ if (soc_has_viu()) {
+ clks[MPC512x_CLK_VIU] = mpc512x_clk_gated(
+ "viu", "csb", &clkregs->sccr2, 18);
+ }
+ if (soc_has_sdhc2()) {
+ clks[MPC512x_CLK_SDHC2] = mpc512x_clk_gated(
+ "sdhc-2", "sdhc2-ug", &clkregs->sccr2, 17);
+ }
+
+ if (soc_has_outclk()) {
+ size_t idx; /* used as mclk_idx, just to trim line length */
+ for (idx = 0; idx < ARRAY_SIZE(mclk_outclk_data); idx++)
+ mpc512x_clk_setup_mclk(&mclk_outclk_data[idx], idx);
+ }
+
+ /*
+ * externally provided clocks (when implemented in hardware,
+ * device tree may specify values which otherwise were unknown)
+ */
+ freq = get_freq_from_dt("psc_mclk_in");
+ if (!freq)
+ freq = 25000000;
+ clks[MPC512x_CLK_PSC_MCLK_IN] = mpc512x_clk_fixed("psc_mclk_in", freq);
+ if (soc_has_mclk_mux0_canin()) {
+ freq = get_freq_from_dt("can_clk_in");
+ clks[MPC512x_CLK_CAN_CLK_IN] = mpc512x_clk_fixed(
+ "can_clk_in", freq);
+ } else {
+ freq = get_freq_from_dt("spdif_tx_in");
+ clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed(
+ "spdif_tx_in", freq);
+ freq = get_freq_from_dt("spdif_rx_in");
+ clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed(
+ "spdif_rx_in", freq);
+ }
+
+ /* fixed frequency for AC97, always 24.567MHz */
+ clks[MPC512x_CLK_AC97] = mpc512x_clk_fixed("ac97", 24567000);
+
+ /*
+ * pre-enable those "internal" clock items which never get
+ * claimed by any peripheral driver, to not have the clock
+ * subsystem disable them late at startup
+ */
+ clk_prepare_enable(clks[MPC512x_CLK_DUMMY]);
+ clk_prepare_enable(clks[MPC512x_CLK_E300]); /* PowerPC CPU */
+ clk_prepare_enable(clks[MPC512x_CLK_DDR]); /* DRAM */
+ clk_prepare_enable(clks[MPC512x_CLK_MEM]); /* SRAM */
+ clk_prepare_enable(clks[MPC512x_CLK_IPS]); /* SoC periph */
+ clk_prepare_enable(clks[MPC512x_CLK_LPC]); /* boot media */
+}
+
+/*
+ * registers the set of public clocks (those listed in the dt-bindings/
+ * header file) for OF lookups, keeps the intermediates private to us
+ */
+static void __init mpc5121_clk_register_of_provider(struct device_node *np)
+{
+ clk_data.clks = clks;
+ clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1; /* _not_ ARRAY_SIZE() */
+ of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+}
+
+/*
+ * temporary support for the period of time between introduction of CCF
+ * support and the adjustment of peripheral drivers to OF based lookups
+ */
+static void __init mpc5121_clk_provide_migration_support(void)
+{
+ struct device_node *np;
+ /*
+ * pre-enable those clock items which are not yet appropriately
+ * acquired by their peripheral driver
+ *
+ * the PCI clock cannot get acquired by its peripheral driver,
+ * because for this platform the driver won't probe(), instead
+ * initialization is done from within the .setup_arch() routine
+ * at a point in time where the clock provider has not been
+ * setup yet and thus isn't available yet
+ *
+ * so we "pre-enable" the clock here, to not have the clock
+ * subsystem automatically disable this item in a late init call
+ *
+ * this PCI clock pre-enable workaround only applies when there
+ * are device tree nodes for PCI and thus the peripheral driver
+ * has attached to bridges, otherwise the PCI clock remains
+ * unused and so it gets disabled
+ */
+ clk_prepare_enable(clks[MPC512x_CLK_PSC3_MCLK]);/* serial console */
+ np = of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci");
+ of_node_put(np);
+ if (np)
+ clk_prepare_enable(clks[MPC512x_CLK_PCI]);
+}
+
+/*
+ * those macros are not exactly pretty, but they encapsulate a lot
+ * of copy'n'paste heavy code which is even more ugly, and reduce
+ * the potential for inconsistencies in those many code copies
+ */
+#define FOR_NODES(compatname) \
+ for_each_compatible_node(np, NULL, compatname)
+
+#define NODE_PREP do { \
+ of_address_to_resource(np, 0, &res); \
+ snprintf(devname, sizeof(devname), "%pa.%s", &res.start, np->name); \
+} while (0)
+
+#define NODE_CHK(clkname, clkitem, regnode, regflag) do { \
+ struct clk *clk; \
+ clk = of_clk_get_by_name(np, clkname); \
+ if (IS_ERR(clk)) { \
+ clk = clkitem; \
+ clk_register_clkdev(clk, clkname, devname); \
+ if (regnode) \
+ clk_register_clkdev(clk, clkname, np->name); \
+ did_register |= DID_REG_ ## regflag; \
+ pr_debug("clock alias name '%s' for dev '%s' pointer %p\n", \
+ clkname, devname, clk); \
+ } else { \
+ clk_put(clk); \
+ } \
+} while (0)
+
+/*
+ * register source code provided fallback results for clock lookups,
+ * these get consulted when OF based clock lookup fails (that is in the
+ * case of not yet adjusted device tree data, where clock related specs
+ * are missing)
+ */
+static void __init mpc5121_clk_provide_backwards_compat(void)
+{
+ enum did_reg_flags {
+ DID_REG_PSC = BIT(0),
+ DID_REG_PSCFIFO = BIT(1),
+ DID_REG_NFC = BIT(2),
+ DID_REG_CAN = BIT(3),
+ DID_REG_I2C = BIT(4),
+ DID_REG_DIU = BIT(5),
+ DID_REG_VIU = BIT(6),
+ DID_REG_FEC = BIT(7),
+ DID_REG_USB = BIT(8),
+ DID_REG_PATA = BIT(9),
+ };
+
+ int did_register;
+ struct device_node *np;
+ struct resource res;
+ int idx;
+ char devname[32];
+
+ did_register = 0;
+
+ FOR_NODES(mpc512x_select_psc_compat()) {
+ NODE_PREP;
+ idx = (res.start >> 8) & 0xf;
+ NODE_CHK("ipg", clks[MPC512x_CLK_PSC0 + idx], 0, PSC);
+ NODE_CHK("mclk", clks[MPC512x_CLK_PSC0_MCLK + idx], 0, PSC);
+ }
+
+ FOR_NODES("fsl,mpc5121-psc-fifo") {
+ NODE_PREP;
+ NODE_CHK("ipg", clks[MPC512x_CLK_PSC_FIFO], 1, PSCFIFO);
+ }
+
+ FOR_NODES("fsl,mpc5121-nfc") {
+ NODE_PREP;
+ NODE_CHK("ipg", clks[MPC512x_CLK_NFC], 0, NFC);
+ }
+
+ FOR_NODES("fsl,mpc5121-mscan") {
+ NODE_PREP;
+ idx = 0;
+ idx += (res.start & 0x2000) ? 2 : 0;
+ idx += (res.start & 0x0080) ? 1 : 0;
+ NODE_CHK("ipg", clks[MPC512x_CLK_BDLC], 0, CAN);
+ NODE_CHK("mclk", clks[MPC512x_CLK_MSCAN0_MCLK + idx], 0, CAN);
+ }
+
+ /*
+ * do register the 'ips', 'sys', and 'ref' names globally
+ * instead of inside each individual CAN node, as there is no
+ * potential for a name conflict (in contrast to 'ipg' and 'mclk')
+ */
+ if (did_register & DID_REG_CAN) {
+ clk_register_clkdev(clks[MPC512x_CLK_IPS], "ips", NULL);
+ clk_register_clkdev(clks[MPC512x_CLK_SYS], "sys", NULL);
+ clk_register_clkdev(clks[MPC512x_CLK_REF], "ref", NULL);
+ }
+
+ FOR_NODES("fsl,mpc5121-i2c") {
+ NODE_PREP;
+ NODE_CHK("ipg", clks[MPC512x_CLK_I2C], 0, I2C);
+ }
+
+ /*
+ * workaround for the fact that the I2C driver does an "anonymous"
+ * lookup (NULL name spec, which yields the first clock spec) for
+ * which we cannot register an alias -- a _global_ 'ipg' alias that
+ * is not bound to any device name and returns the I2C clock item
+ * is not a good idea
+ *
+ * so we have the lookup in the peripheral driver fail, which is
+ * silent and non-fatal, and pre-enable the clock item here such
+ * that register access is possible
+ *
+ * see commit b3bfce2b "i2c: mpc: cleanup clock API use" for
+ * details, adjusting s/NULL/"ipg"/ in i2c-mpc.c would make this
+ * workaround obsolete
+ */
+ if (did_register & DID_REG_I2C)
+ clk_prepare_enable(clks[MPC512x_CLK_I2C]);
+
+ FOR_NODES("fsl,mpc5121-diu") {
+ NODE_PREP;
+ NODE_CHK("ipg", clks[MPC512x_CLK_DIU], 1, DIU);
+ }
+
+ FOR_NODES("fsl,mpc5121-viu") {
+ NODE_PREP;
+ NODE_CHK("ipg", clks[MPC512x_CLK_VIU], 0, VIU);
+ }
+
+ /*
+ * note that 2771399a "fs_enet: cleanup clock API use" did use the
+ * "per" string for the clock lookup in contrast to the "ipg" name
+ * which most other nodes are using -- this is not a fatal thing
+ * but just something to keep in mind when doing compatibility
+ * registration, it's a non-issue with up-to-date device tree data
+ */
+ FOR_NODES("fsl,mpc5121-fec") {
+ NODE_PREP;
+ NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC);
+ }
+ FOR_NODES("fsl,mpc5121-fec-mdio") {
+ NODE_PREP;
+ NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC);
+ }
+ /*
+ * MPC5125 has two FECs: FEC1 at 0x2800, FEC2 at 0x4800;
+ * the clock items don't "form an array" since FEC2 was
+ * added only later and was not allowed to shift all other
+ * clock item indices, so the numbers aren't adjacent
+ */
+ FOR_NODES("fsl,mpc5125-fec") {
+ NODE_PREP;
+ if (res.start & 0x4000)
+ idx = MPC512x_CLK_FEC2;
+ else
+ idx = MPC512x_CLK_FEC;
+ NODE_CHK("per", clks[idx], 0, FEC);
+ }
+
+ FOR_NODES("fsl,mpc5121-usb2-dr") {
+ NODE_PREP;
+ idx = (res.start & 0x4000) ? 1 : 0;
+ NODE_CHK("ipg", clks[MPC512x_CLK_USB1 + idx], 0, USB);
+ }
+
+ FOR_NODES("fsl,mpc5121-pata") {
+ NODE_PREP;
+ NODE_CHK("ipg", clks[MPC512x_CLK_PATA], 0, PATA);
+ }
+
+ /*
+ * try to collapse diagnostics into a single line of output yet
+ * provide a full list of what is missing, to avoid noise in the
+ * absence of up-to-date device tree data -- backwards
+ * compatibility to old DTBs is a requirement, updates may be
+ * desirable or preferrable but are not at all mandatory
+ */
+ if (did_register) {
+ pr_notice("device tree lacks clock specs, adding fallbacks (0x%x,%s%s%s%s%s%s%s%s%s%s)\n",
+ did_register,
+ (did_register & DID_REG_PSC) ? " PSC" : "",
+ (did_register & DID_REG_PSCFIFO) ? " PSCFIFO" : "",
+ (did_register & DID_REG_NFC) ? " NFC" : "",
+ (did_register & DID_REG_CAN) ? " CAN" : "",
+ (did_register & DID_REG_I2C) ? " I2C" : "",
+ (did_register & DID_REG_DIU) ? " DIU" : "",
+ (did_register & DID_REG_VIU) ? " VIU" : "",
+ (did_register & DID_REG_FEC) ? " FEC" : "",
+ (did_register & DID_REG_USB) ? " USB" : "",
+ (did_register & DID_REG_PATA) ? " PATA" : "");
+ } else {
+ pr_debug("device tree has clock specs, no fallbacks added\n");
+ }
+}
+
+/*
+ * The "fixed-clock" nodes (which includes the oscillator node if the board's
+ * DT provides one) has already been scanned by the of_clk_init() in
+ * time_init().
+ */
+int __init mpc5121_clk_init(void)
+{
+ struct device_node *clk_np;
+ int busfreq;
+
+ /* map the clock control registers */
+ clk_np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock");
+ if (!clk_np)
+ return -ENODEV;
+ clkregs = of_iomap(clk_np, 0);
+ WARN_ON(!clkregs);
+
+ /* determine the SoC variant we run on */
+ mpc512x_clk_determine_soc();
+
+ /* invalidate all not yet registered clock slots */
+ mpc512x_clk_preset_data();
+
+ /*
+ * add a dummy clock for those situations where a clock spec is
+ * required yet no real clock is involved
+ */
+ clks[MPC512x_CLK_DUMMY] = mpc512x_clk_fixed("dummy", 0);
+
+ /*
+ * have all the real nodes in the clock tree populated from REF
+ * down to all leaves, either starting from the OSC node or from
+ * a REF root that was created from the IPS bus clock input
+ */
+ busfreq = get_freq_from_dt("bus-frequency");
+ mpc512x_clk_setup_clock_tree(clk_np, busfreq);
+
+ /* register as an OF clock provider */
+ mpc5121_clk_register_of_provider(clk_np);
+
+ of_node_put(clk_np);
+
+ /*
+ * unbreak not yet adjusted peripheral drivers during migration
+ * towards fully operational common clock support, and allow
+ * operation in the absence of clock related device tree specs
+ */
+ mpc5121_clk_provide_migration_support();
+ mpc5121_clk_provide_backwards_compat();
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
new file mode 100644
index 000000000..a18f85b3e
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007, 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby, <jrigby@freescale.com>, Thur Mar 29 2007
+ *
+ * Description:
+ * MPC5121 ADS board setup
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/time.h>
+
+#include <sysdev/fsl_pci.h>
+
+#include "mpc512x.h"
+#include "mpc5121_ads.h"
+
+static void __init mpc5121_ads_setup_arch(void)
+{
+ printk(KERN_INFO "MPC5121 ADS board from Freescale Semiconductor\n");
+ /*
+ * cpld regs are needed early
+ */
+ mpc5121_ads_cpld_map();
+
+ mpc512x_setup_arch();
+}
+
+static void __init mpc5121_ads_setup_pci(void)
+{
+#ifdef CONFIG_PCI
+ struct device_node *np;
+
+ for_each_compatible_node(np, "pci", "fsl,mpc5121-pci")
+ mpc83xx_add_bridge(np);
+#endif
+}
+
+static void __init mpc5121_ads_init_IRQ(void)
+{
+ mpc512x_init_IRQ();
+ mpc5121_ads_cpld_pic_init();
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc5121_ads_probe(void)
+{
+ mpc512x_init_early();
+
+ return 1;
+}
+
+define_machine(mpc5121_ads) {
+ .name = "MPC5121 ADS",
+ .compatible = "fsl,mpc5121ads",
+ .probe = mpc5121_ads_probe,
+ .setup_arch = mpc5121_ads_setup_arch,
+ .discover_phbs = mpc5121_ads_setup_pci,
+ .init = mpc512x_init,
+ .init_IRQ = mpc5121_ads_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc512x_restart,
+};
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.h b/arch/powerpc/platforms/512x/mpc5121_ads.h
new file mode 100644
index 000000000..c88dea828
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Prototypes for ADS5121 specific code
+ */
+
+#ifndef __MPC512ADS_H__
+#define __MPC512ADS_H__
+extern void __init mpc5121_ads_cpld_map(void);
+extern void __init mpc5121_ads_cpld_pic_init(void);
+#endif /* __MPC512ADS_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
new file mode 100644
index 000000000..6f08d07ae
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby, <jrigby@freescale.com>
+ *
+ * Description:
+ * MPC5121ADS CPLD irq handling
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+static struct device_node *cpld_pic_node;
+static struct irq_domain *cpld_pic_host;
+
+/*
+ * Bits to ignore in the misc_status register
+ * 0x10 touch screen pendown is hard routed to irq1
+ * 0x02 pci status is read from pci status register
+ */
+#define MISC_IGNORE 0x12
+
+/*
+ * Nothing to ignore in pci status register
+ */
+#define PCI_IGNORE 0x00
+
+struct cpld_pic {
+ u8 pci_mask;
+ u8 pci_status;
+ u8 route;
+ u8 misc_mask;
+ u8 misc_status;
+ u8 misc_control;
+};
+
+static struct cpld_pic __iomem *cpld_regs;
+
+static void __iomem *
+irq_to_pic_mask(unsigned int irq)
+{
+ return irq <= 7 ? &cpld_regs->pci_mask : &cpld_regs->misc_mask;
+}
+
+static unsigned int
+irq_to_pic_bit(unsigned int irq)
+{
+ return 1 << (irq & 0x7);
+}
+
+static void
+cpld_mask_irq(struct irq_data *d)
+{
+ unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
+ void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
+
+ out_8(pic_mask,
+ in_8(pic_mask) | irq_to_pic_bit(cpld_irq));
+}
+
+static void
+cpld_unmask_irq(struct irq_data *d)
+{
+ unsigned int cpld_irq = (unsigned int)irqd_to_hwirq(d);
+ void __iomem *pic_mask = irq_to_pic_mask(cpld_irq);
+
+ out_8(pic_mask,
+ in_8(pic_mask) & ~irq_to_pic_bit(cpld_irq));
+}
+
+static struct irq_chip cpld_pic = {
+ .name = "CPLD PIC",
+ .irq_mask = cpld_mask_irq,
+ .irq_ack = cpld_mask_irq,
+ .irq_unmask = cpld_unmask_irq,
+};
+
+static unsigned int
+cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
+ u8 __iomem *maskp)
+{
+ u8 status = in_8(statusp);
+ u8 mask = in_8(maskp);
+
+ /* ignore don't cares and masked irqs */
+ status |= (ignore | mask);
+
+ if (status == 0xff)
+ return ~0;
+
+ return ffz(status) + offset;
+}
+
+static void cpld_pic_cascade(struct irq_desc *desc)
+{
+ unsigned int hwirq;
+
+ hwirq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
+ &cpld_regs->pci_mask);
+ if (hwirq != ~0) {
+ generic_handle_domain_irq(cpld_pic_host, hwirq);
+ return;
+ }
+
+ hwirq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
+ &cpld_regs->misc_mask);
+ if (hwirq != ~0) {
+ generic_handle_domain_irq(cpld_pic_host, hwirq);
+ return;
+ }
+}
+
+static int
+cpld_pic_host_match(struct irq_domain *h, struct device_node *node,
+ enum irq_domain_bus_token bus_token)
+{
+ return cpld_pic_node == node;
+}
+
+static int
+cpld_pic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_chip_and_handler(virq, &cpld_pic, handle_level_irq);
+ return 0;
+}
+
+static const struct irq_domain_ops cpld_pic_host_ops = {
+ .match = cpld_pic_host_match,
+ .map = cpld_pic_host_map,
+};
+
+void __init
+mpc5121_ads_cpld_map(void)
+{
+ struct device_node *np = NULL;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121ads-cpld-pic");
+ if (!np) {
+ printk(KERN_ERR "CPLD PIC init: can not find cpld-pic node\n");
+ return;
+ }
+
+ cpld_regs = of_iomap(np, 0);
+ of_node_put(np);
+}
+
+void __init
+mpc5121_ads_cpld_pic_init(void)
+{
+ unsigned int cascade_irq;
+ struct device_node *np = NULL;
+
+ pr_debug("cpld_ic_init\n");
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121ads-cpld-pic");
+ if (!np) {
+ printk(KERN_ERR "CPLD PIC init: can not find cpld-pic node\n");
+ return;
+ }
+
+ if (!cpld_regs)
+ goto end;
+
+ cascade_irq = irq_of_parse_and_map(np, 0);
+ if (!cascade_irq)
+ goto end;
+
+ /*
+ * statically route touch screen pendown through 1
+ * and ignore it here
+ * route all others through our cascade irq
+ */
+ out_8(&cpld_regs->route, 0xfd);
+ out_8(&cpld_regs->pci_mask, 0xff);
+ /* unmask pci ints in misc mask */
+ out_8(&cpld_regs->misc_mask, ~(MISC_IGNORE));
+
+ cpld_pic_node = of_node_get(np);
+
+ cpld_pic_host = irq_domain_add_linear(np, 16, &cpld_pic_host_ops, NULL);
+ if (!cpld_pic_host) {
+ printk(KERN_ERR "CPLD PIC: failed to allocate irq host!\n");
+ goto end;
+ }
+
+ irq_set_chained_handler(cascade_irq, cpld_pic_cascade);
+end:
+ of_node_put(np);
+}
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
new file mode 100644
index 000000000..d2cb06e3a
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Prototypes for MPC512x shared code
+ */
+
+#ifndef __MPC512X_H__
+#define __MPC512X_H__
+extern void __init mpc512x_init_IRQ(void);
+extern void __init mpc512x_init_early(void);
+extern void __init mpc512x_init(void);
+extern void __init mpc512x_setup_arch(void);
+extern int __init mpc5121_clk_init(void);
+const char *__init mpc512x_select_psc_compat(void);
+extern void __noreturn mpc512x_restart(char *cmd);
+
+#endif /* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c
new file mode 100644
index 000000000..0d58ab257
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x_generic.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby, <jrigby@freescale.com>
+ *
+ * Description:
+ * MPC512x SoC setup
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/time.h>
+
+#include "mpc512x.h"
+
+/*
+ * list of supported boards
+ */
+static const char * const board[] __initconst = {
+ "prt,prtlvt",
+ "fsl,mpc5125ads",
+ "ifm,ac14xx",
+ NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc512x_generic_probe(void)
+{
+ if (!of_device_compatible_match(of_root, board))
+ return 0;
+
+ mpc512x_init_early();
+
+ return 1;
+}
+
+define_machine(mpc512x_generic) {
+ .name = "MPC512x generic",
+ .probe = mpc512x_generic_probe,
+ .init = mpc512x_init,
+ .setup_arch = mpc512x_setup_arch,
+ .init_IRQ = mpc512x_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc512x_restart,
+};
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
new file mode 100644
index 000000000..4a25b6b48
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -0,0 +1,518 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The driver for Freescale MPC512x LocalPlus Bus FIFO
+ * (called SCLPC in the Reference Manual).
+ *
+ * Copyright (C) 2013-2015 Alexander Popov <alex.popov@linux.com>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <asm/mpc5121.h>
+#include <asm/io.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+
+#define DRV_NAME "mpc512x_lpbfifo"
+
+struct cs_range {
+ u32 csnum;
+ u32 base; /* must be zero */
+ u32 addr;
+ u32 size;
+};
+
+static struct lpbfifo_data {
+ spinlock_t lock; /* for protecting lpbfifo_data */
+ phys_addr_t regs_phys;
+ resource_size_t regs_size;
+ struct mpc512x_lpbfifo __iomem *regs;
+ int irq;
+ struct cs_range *cs_ranges;
+ size_t cs_n;
+ struct dma_chan *chan;
+ struct mpc512x_lpbfifo_request *req;
+ dma_addr_t ram_bus_addr;
+ bool wait_lpbfifo_irq;
+ bool wait_lpbfifo_callback;
+} lpbfifo;
+
+/*
+ * A data transfer from RAM to some device on LPB is finished
+ * when both mpc512x_lpbfifo_irq() and mpc512x_lpbfifo_callback()
+ * have been called. We execute the callback registered in
+ * mpc512x_lpbfifo_request just after that.
+ * But for a data transfer from some device on LPB to RAM we don't enable
+ * LPBFIFO interrupt because clearing MPC512X_SCLPC_SUCCESS interrupt flag
+ * automatically disables LPBFIFO reading request to the DMA controller
+ * and the data transfer hangs. So the callback registered in
+ * mpc512x_lpbfifo_request is executed at the end of mpc512x_lpbfifo_callback().
+ */
+
+/*
+ * mpc512x_lpbfifo_irq - IRQ handler for LPB FIFO
+ */
+static irqreturn_t mpc512x_lpbfifo_irq(int irq, void *param)
+{
+ struct device *dev = (struct device *)param;
+ struct mpc512x_lpbfifo_request *req = NULL;
+ unsigned long flags;
+ u32 status;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+
+ if (!lpbfifo.regs)
+ goto end;
+
+ req = lpbfifo.req;
+ if (!req || req->dir == MPC512X_LPBFIFO_REQ_DIR_READ) {
+ dev_err(dev, "bogus LPBFIFO IRQ\n");
+ goto end;
+ }
+
+ status = in_be32(&lpbfifo.regs->status);
+ if (status != MPC512X_SCLPC_SUCCESS) {
+ dev_err(dev, "DMA transfer from RAM to peripheral failed\n");
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+ goto end;
+ }
+ /* Clear the interrupt flag */
+ out_be32(&lpbfifo.regs->status, MPC512X_SCLPC_SUCCESS);
+
+ lpbfifo.wait_lpbfifo_irq = false;
+
+ if (lpbfifo.wait_lpbfifo_callback)
+ goto end;
+
+ /* Transfer is finished, set the FIFO as idle */
+ lpbfifo.req = NULL;
+
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ if (req->callback)
+ req->callback(req);
+
+ return IRQ_HANDLED;
+
+ end:
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ return IRQ_HANDLED;
+}
+
+/*
+ * mpc512x_lpbfifo_callback is called by DMA driver when
+ * DMA transaction is finished.
+ */
+static void mpc512x_lpbfifo_callback(void *param)
+{
+ unsigned long flags;
+ struct mpc512x_lpbfifo_request *req = NULL;
+ enum dma_data_direction dir;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+
+ if (!lpbfifo.regs) {
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ return;
+ }
+
+ req = lpbfifo.req;
+ if (!req) {
+ pr_err("bogus LPBFIFO callback\n");
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ return;
+ }
+
+ /* Release the mapping */
+ if (req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE)
+ dir = DMA_TO_DEVICE;
+ else
+ dir = DMA_FROM_DEVICE;
+ dma_unmap_single(lpbfifo.chan->device->dev,
+ lpbfifo.ram_bus_addr, req->size, dir);
+
+ lpbfifo.wait_lpbfifo_callback = false;
+
+ if (!lpbfifo.wait_lpbfifo_irq) {
+ /* Transfer is finished, set the FIFO as idle */
+ lpbfifo.req = NULL;
+
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ if (req->callback)
+ req->callback(req);
+ } else {
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ }
+}
+
+static int mpc512x_lpbfifo_kick(void)
+{
+ u32 bits;
+ bool no_incr = false;
+ u32 bpt = 32; /* max bytes per LPBFIFO transaction involving DMA */
+ u32 cs = 0;
+ size_t i;
+ struct dma_device *dma_dev = NULL;
+ struct scatterlist sg;
+ enum dma_data_direction dir;
+ struct dma_slave_config dma_conf = {};
+ struct dma_async_tx_descriptor *dma_tx = NULL;
+ dma_cookie_t cookie;
+ int ret;
+
+ /*
+ * 1. Fit the requirements:
+ * - the packet size must be a multiple of 4 since FIFO Data Word
+ * Register allows only full-word access according the Reference
+ * Manual;
+ * - the physical address of the device on LPB and the packet size
+ * must be aligned on BPT (bytes per transaction) or 8-bytes
+ * boundary according the Reference Manual;
+ * - but we choose DMA maxburst equal (or very close to) BPT to prevent
+ * DMA controller from overtaking FIFO and causing FIFO underflow
+ * error. So we force the packet size to be aligned on BPT boundary
+ * not to confuse DMA driver which requires the packet size to be
+ * aligned on maxburst boundary;
+ * - BPT should be set to the LPB device port size for operation with
+ * disabled auto-incrementing according Reference Manual.
+ */
+ if (lpbfifo.req->size == 0 || !IS_ALIGNED(lpbfifo.req->size, 4))
+ return -EINVAL;
+
+ if (lpbfifo.req->portsize != LPB_DEV_PORTSIZE_UNDEFINED) {
+ bpt = lpbfifo.req->portsize;
+ no_incr = true;
+ }
+
+ while (bpt > 1) {
+ if (IS_ALIGNED(lpbfifo.req->dev_phys_addr, min(bpt, 0x8u)) &&
+ IS_ALIGNED(lpbfifo.req->size, bpt)) {
+ break;
+ }
+
+ if (no_incr)
+ return -EINVAL;
+
+ bpt >>= 1;
+ }
+ dma_conf.dst_maxburst = max(bpt, 0x4u) / 4;
+ dma_conf.src_maxburst = max(bpt, 0x4u) / 4;
+
+ for (i = 0; i < lpbfifo.cs_n; i++) {
+ phys_addr_t cs_start = lpbfifo.cs_ranges[i].addr;
+ phys_addr_t cs_end = cs_start + lpbfifo.cs_ranges[i].size;
+ phys_addr_t access_start = lpbfifo.req->dev_phys_addr;
+ phys_addr_t access_end = access_start + lpbfifo.req->size;
+
+ if (access_start >= cs_start && access_end <= cs_end) {
+ cs = lpbfifo.cs_ranges[i].csnum;
+ break;
+ }
+ }
+ if (i == lpbfifo.cs_n)
+ return -EFAULT;
+
+ /* 2. Prepare DMA */
+ dma_dev = lpbfifo.chan->device;
+
+ if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE) {
+ dir = DMA_TO_DEVICE;
+ dma_conf.direction = DMA_MEM_TO_DEV;
+ dma_conf.dst_addr = lpbfifo.regs_phys +
+ offsetof(struct mpc512x_lpbfifo, data_word);
+ } else {
+ dir = DMA_FROM_DEVICE;
+ dma_conf.direction = DMA_DEV_TO_MEM;
+ dma_conf.src_addr = lpbfifo.regs_phys +
+ offsetof(struct mpc512x_lpbfifo, data_word);
+ }
+ dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ dma_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+ /* Make DMA channel work with LPB FIFO data register */
+ if (dma_dev->device_config(lpbfifo.chan, &dma_conf)) {
+ ret = -EINVAL;
+ goto err_dma_prep;
+ }
+
+ sg_init_table(&sg, 1);
+
+ sg_dma_address(&sg) = dma_map_single(dma_dev->dev,
+ lpbfifo.req->ram_virt_addr, lpbfifo.req->size, dir);
+ if (dma_mapping_error(dma_dev->dev, sg_dma_address(&sg)))
+ return -EFAULT;
+
+ lpbfifo.ram_bus_addr = sg_dma_address(&sg); /* For freeing later */
+
+ sg_dma_len(&sg) = lpbfifo.req->size;
+
+ dma_tx = dmaengine_prep_slave_sg(lpbfifo.chan, &sg,
+ 1, dma_conf.direction, 0);
+ if (!dma_tx) {
+ ret = -ENOSPC;
+ goto err_dma_prep;
+ }
+ dma_tx->callback = mpc512x_lpbfifo_callback;
+ dma_tx->callback_param = NULL;
+
+ /* 3. Prepare FIFO */
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+ out_be32(&lpbfifo.regs->enable, 0x0);
+
+ /*
+ * Configure the watermarks for write operation (RAM->DMA->FIFO->dev):
+ * - high watermark 7 words according the Reference Manual,
+ * - low watermark 512 bytes (half of the FIFO).
+ * These watermarks don't work for read operation since the
+ * MPC512X_SCLPC_FLUSH bit is set (according the Reference Manual).
+ */
+ out_be32(&lpbfifo.regs->fifo_ctrl, MPC512X_SCLPC_FIFO_CTRL(0x7));
+ out_be32(&lpbfifo.regs->fifo_alarm, MPC512X_SCLPC_FIFO_ALARM(0x200));
+
+ /*
+ * Start address is a physical address of the region which belongs
+ * to the device on the LocalPlus Bus
+ */
+ out_be32(&lpbfifo.regs->start_addr, lpbfifo.req->dev_phys_addr);
+
+ /*
+ * Configure chip select, transfer direction, address increment option
+ * and bytes per transaction option
+ */
+ bits = MPC512X_SCLPC_CS(cs);
+ if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_READ)
+ bits |= MPC512X_SCLPC_READ | MPC512X_SCLPC_FLUSH;
+ if (no_incr)
+ bits |= MPC512X_SCLPC_DAI;
+ bits |= MPC512X_SCLPC_BPT(bpt);
+ out_be32(&lpbfifo.regs->ctrl, bits);
+
+ /* Unmask irqs */
+ bits = MPC512X_SCLPC_ENABLE | MPC512X_SCLPC_ABORT_INT_ENABLE;
+ if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE)
+ bits |= MPC512X_SCLPC_NORM_INT_ENABLE;
+ else
+ lpbfifo.wait_lpbfifo_irq = false;
+
+ out_be32(&lpbfifo.regs->enable, bits);
+
+ /* 4. Set packet size and kick FIFO off */
+ bits = lpbfifo.req->size | MPC512X_SCLPC_START;
+ out_be32(&lpbfifo.regs->pkt_size, bits);
+
+ /* 5. Finally kick DMA off */
+ cookie = dma_tx->tx_submit(dma_tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOSPC;
+ goto err_dma_submit;
+ }
+
+ return 0;
+
+ err_dma_submit:
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+ err_dma_prep:
+ dma_unmap_single(dma_dev->dev, sg_dma_address(&sg),
+ lpbfifo.req->size, dir);
+ return ret;
+}
+
+static int mpc512x_lpbfifo_submit_locked(struct mpc512x_lpbfifo_request *req)
+{
+ int ret = 0;
+
+ if (!lpbfifo.regs)
+ return -ENODEV;
+
+ /* Check whether a transfer is in progress */
+ if (lpbfifo.req)
+ return -EBUSY;
+
+ lpbfifo.wait_lpbfifo_irq = true;
+ lpbfifo.wait_lpbfifo_callback = true;
+ lpbfifo.req = req;
+
+ ret = mpc512x_lpbfifo_kick();
+ if (ret != 0)
+ lpbfifo.req = NULL; /* Set the FIFO as idle */
+
+ return ret;
+}
+
+int mpc512x_lpbfifo_submit(struct mpc512x_lpbfifo_request *req)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+ ret = mpc512x_lpbfifo_submit_locked(req);
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(mpc512x_lpbfifo_submit);
+
+/*
+ * LPBFIFO driver uses "ranges" property of "localbus" device tree node
+ * for being able to determine the chip select number of a client device
+ * ordering a DMA transfer.
+ */
+static int get_cs_ranges(struct device *dev)
+{
+ int ret = -ENODEV;
+ struct device_node *lb_node;
+ size_t i = 0;
+ struct of_range_parser parser;
+ struct of_range range;
+
+ lb_node = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-localbus");
+ if (!lb_node)
+ return ret;
+
+ of_range_parser_init(&parser, lb_node);
+ lpbfifo.cs_n = of_range_count(&parser);
+
+ lpbfifo.cs_ranges = devm_kcalloc(dev, lpbfifo.cs_n,
+ sizeof(struct cs_range), GFP_KERNEL);
+ if (!lpbfifo.cs_ranges)
+ goto end;
+
+ for_each_of_range(&parser, &range) {
+ u32 base = lower_32_bits(range.bus_addr);
+ if (base)
+ goto end;
+
+ lpbfifo.cs_ranges[i].csnum = upper_32_bits(range.bus_addr);
+ lpbfifo.cs_ranges[i].base = base;
+ lpbfifo.cs_ranges[i].addr = range.cpu_addr;
+ lpbfifo.cs_ranges[i].size = range.size;
+ i++;
+ }
+
+ ret = 0;
+
+ end:
+ of_node_put(lb_node);
+ return ret;
+}
+
+static int mpc512x_lpbfifo_probe(struct platform_device *pdev)
+{
+ struct resource r;
+ int ret = 0;
+
+ memset(&lpbfifo, 0, sizeof(struct lpbfifo_data));
+ spin_lock_init(&lpbfifo.lock);
+
+ lpbfifo.chan = dma_request_chan(&pdev->dev, "rx-tx");
+ if (IS_ERR(lpbfifo.chan))
+ return PTR_ERR(lpbfifo.chan);
+
+ if (of_address_to_resource(pdev->dev.of_node, 0, &r) != 0) {
+ dev_err(&pdev->dev, "bad 'reg' in 'sclpc' device tree node\n");
+ ret = -ENODEV;
+ goto err0;
+ }
+
+ lpbfifo.regs_phys = r.start;
+ lpbfifo.regs_size = resource_size(&r);
+
+ if (!devm_request_mem_region(&pdev->dev, lpbfifo.regs_phys,
+ lpbfifo.regs_size, DRV_NAME)) {
+ dev_err(&pdev->dev, "unable to request region\n");
+ ret = -EBUSY;
+ goto err0;
+ }
+
+ lpbfifo.regs = devm_ioremap(&pdev->dev,
+ lpbfifo.regs_phys, lpbfifo.regs_size);
+ if (!lpbfifo.regs) {
+ dev_err(&pdev->dev, "mapping registers failed\n");
+ ret = -ENOMEM;
+ goto err0;
+ }
+
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+
+ if (get_cs_ranges(&pdev->dev) != 0) {
+ dev_err(&pdev->dev, "bad '/localbus' device tree node\n");
+ ret = -ENODEV;
+ goto err0;
+ }
+
+ lpbfifo.irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+ if (!lpbfifo.irq) {
+ dev_err(&pdev->dev, "mapping irq failed\n");
+ ret = -ENODEV;
+ goto err0;
+ }
+
+ if (request_irq(lpbfifo.irq, mpc512x_lpbfifo_irq, 0,
+ DRV_NAME, &pdev->dev) != 0) {
+ dev_err(&pdev->dev, "requesting irq failed\n");
+ ret = -ENODEV;
+ goto err1;
+ }
+
+ dev_info(&pdev->dev, "probe succeeded\n");
+ return 0;
+
+ err1:
+ irq_dispose_mapping(lpbfifo.irq);
+ err0:
+ dma_release_channel(lpbfifo.chan);
+ return ret;
+}
+
+static void mpc512x_lpbfifo_remove(struct platform_device *pdev)
+{
+ unsigned long flags;
+ struct dma_device *dma_dev = lpbfifo.chan->device;
+ struct mpc512x_lpbfifo __iomem *regs = NULL;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+ regs = lpbfifo.regs;
+ lpbfifo.regs = NULL;
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ dma_dev->device_terminate_all(lpbfifo.chan);
+ out_be32(&regs->enable, MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+
+ free_irq(lpbfifo.irq, &pdev->dev);
+ irq_dispose_mapping(lpbfifo.irq);
+ dma_release_channel(lpbfifo.chan);
+}
+
+static const struct of_device_id mpc512x_lpbfifo_match[] = {
+ { .compatible = "fsl,mpc512x-lpbfifo", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, mpc512x_lpbfifo_match);
+
+static struct platform_driver mpc512x_lpbfifo_driver = {
+ .probe = mpc512x_lpbfifo_probe,
+ .remove_new = mpc512x_lpbfifo_remove,
+ .driver = {
+ .name = DRV_NAME,
+ .of_match_table = mpc512x_lpbfifo_match,
+ },
+};
+
+module_platform_driver(mpc512x_lpbfifo_driver);
+
+MODULE_AUTHOR("Alexander Popov <alex.popov@linux.com>");
+MODULE_DESCRIPTION("MPC512x LocalPlus Bus FIFO device driver");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
new file mode 100644
index 000000000..8f75e9574
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: John Rigby <jrigby@freescale.com>
+ *
+ * Description:
+ * MPC512x Shared code
+ */
+
+#include <linux/clk.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/fsl-diu-fb.h>
+#include <linux/memblock.h>
+#include <sysdev/fsl_soc.h>
+
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/time.h>
+#include <asm/mpc5121.h>
+#include <asm/mpc52xx_psc.h>
+
+#include "mpc512x.h"
+
+static struct mpc512x_reset_module __iomem *reset_module_base;
+
+void __noreturn mpc512x_restart(char *cmd)
+{
+ if (reset_module_base) {
+ /* Enable software reset "RSTE" */
+ out_be32(&reset_module_base->rpr, 0x52535445);
+ /* Set software hard reset */
+ out_be32(&reset_module_base->rcr, 0x2);
+ } else {
+ pr_err("Restart module not mapped.\n");
+ }
+ for (;;)
+ ;
+}
+
+struct fsl_diu_shared_fb {
+ u8 gamma[0x300]; /* 32-bit aligned! */
+ struct diu_ad ad0; /* 32-bit aligned! */
+ phys_addr_t fb_phys;
+ size_t fb_len;
+ bool in_use;
+};
+
+/* receives a pixel clock spec in pico seconds, adjusts the DIU clock rate */
+static void mpc512x_set_pixel_clock(unsigned int pixclock)
+{
+ struct device_node *np;
+ struct clk *clk_diu;
+ unsigned long epsilon, minpixclock, maxpixclock;
+ unsigned long offset, want, got, delta;
+
+ /* lookup and enable the DIU clock */
+ np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-diu");
+ if (!np) {
+ pr_err("Could not find DIU device tree node.\n");
+ return;
+ }
+ clk_diu = of_clk_get(np, 0);
+ if (IS_ERR(clk_diu)) {
+ /* backwards compat with device trees that lack clock specs */
+ clk_diu = clk_get_sys(np->name, "ipg");
+ }
+ of_node_put(np);
+ if (IS_ERR(clk_diu)) {
+ pr_err("Could not lookup DIU clock.\n");
+ return;
+ }
+ if (clk_prepare_enable(clk_diu)) {
+ pr_err("Could not enable DIU clock.\n");
+ return;
+ }
+
+ /*
+ * convert the picoseconds spec into the desired clock rate,
+ * determine the acceptable clock range for the monitor (+/- 5%),
+ * do the calculation in steps to avoid integer overflow
+ */
+ pr_debug("DIU pixclock in ps - %u\n", pixclock);
+ pixclock = (1000000000 / pixclock) * 1000;
+ pr_debug("DIU pixclock freq - %u\n", pixclock);
+ epsilon = pixclock / 20; /* pixclock * 0.05 */
+ pr_debug("DIU deviation - %lu\n", epsilon);
+ minpixclock = pixclock - epsilon;
+ maxpixclock = pixclock + epsilon;
+ pr_debug("DIU minpixclock - %lu\n", minpixclock);
+ pr_debug("DIU maxpixclock - %lu\n", maxpixclock);
+
+ /*
+ * check whether the DIU supports the desired pixel clock
+ *
+ * - simply request the desired clock and see what the
+ * platform's clock driver will make of it, assuming that it
+ * will setup the best approximation of the requested value
+ * - try other candidate frequencies in the order of decreasing
+ * preference (i.e. with increasing distance from the desired
+ * pixel clock, and checking the lower frequency before the
+ * higher frequency to not overload the hardware) until the
+ * first match is found -- any potential subsequent match
+ * would only be as good as the former match or typically
+ * would be less preferrable
+ *
+ * the offset increment of pixelclock divided by 64 is an
+ * arbitrary choice -- it's simple to calculate, in the typical
+ * case we expect the first check to succeed already, in the
+ * worst case seven frequencies get tested (the exact center and
+ * three more values each to the left and to the right) before
+ * the 5% tolerance window is exceeded, resulting in fast enough
+ * execution yet high enough probability of finding a suitable
+ * value, while the error rate will be in the order of single
+ * percents
+ */
+ for (offset = 0; offset <= epsilon; offset += pixclock / 64) {
+ want = pixclock - offset;
+ pr_debug("DIU checking clock - %lu\n", want);
+ clk_set_rate(clk_diu, want);
+ got = clk_get_rate(clk_diu);
+ delta = abs(pixclock - got);
+ if (delta < epsilon)
+ break;
+ if (!offset)
+ continue;
+ want = pixclock + offset;
+ pr_debug("DIU checking clock - %lu\n", want);
+ clk_set_rate(clk_diu, want);
+ got = clk_get_rate(clk_diu);
+ delta = abs(pixclock - got);
+ if (delta < epsilon)
+ break;
+ }
+ if (offset <= epsilon) {
+ pr_debug("DIU clock accepted - %lu\n", want);
+ pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n",
+ pixclock, got, delta, epsilon);
+ return;
+ }
+ pr_warn("DIU pixclock auto search unsuccessful\n");
+
+ /*
+ * what is the most appropriate action to take when the search
+ * for an available pixel clock which is acceptable to the
+ * monitor has failed? disable the DIU (clock) or just provide
+ * a "best effort"? we go with the latter
+ */
+ pr_warn("DIU pixclock best effort fallback (backend's choice)\n");
+ clk_set_rate(clk_diu, pixclock);
+ got = clk_get_rate(clk_diu);
+ delta = abs(pixclock - got);
+ pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n",
+ pixclock, got, delta, epsilon);
+}
+
+static enum fsl_diu_monitor_port
+mpc512x_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+ return FSL_DIU_PORT_DVI;
+}
+
+static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb;
+
+static inline void mpc512x_free_bootmem(struct page *page)
+{
+ BUG_ON(PageTail(page));
+ BUG_ON(page_ref_count(page) > 1);
+ free_reserved_page(page);
+}
+
+static void mpc512x_release_bootmem(void)
+{
+ unsigned long addr = diu_shared_fb.fb_phys & PAGE_MASK;
+ unsigned long size = diu_shared_fb.fb_len;
+ unsigned long start, end;
+
+ if (diu_shared_fb.in_use) {
+ start = PFN_UP(addr);
+ end = PFN_DOWN(addr + size);
+
+ for (; start < end; start++)
+ mpc512x_free_bootmem(pfn_to_page(start));
+
+ diu_shared_fb.in_use = false;
+ }
+ diu_ops.release_bootmem = NULL;
+}
+
+/*
+ * Check if DIU was pre-initialized. If so, perform steps
+ * needed to continue displaying through the whole boot process.
+ * Move area descriptor and gamma table elsewhere, they are
+ * destroyed by bootmem allocator otherwise. The frame buffer
+ * address range will be reserved in setup_arch() after bootmem
+ * allocator is up.
+ */
+static void __init mpc512x_init_diu(void)
+{
+ struct device_node *np;
+ struct diu __iomem *diu_reg;
+ phys_addr_t desc;
+ void __iomem *vaddr;
+ unsigned long mode, pix_fmt, res, bpp;
+ unsigned long dst;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-diu");
+ if (!np) {
+ pr_err("No DIU node\n");
+ return;
+ }
+
+ diu_reg = of_iomap(np, 0);
+ of_node_put(np);
+ if (!diu_reg) {
+ pr_err("Can't map DIU\n");
+ return;
+ }
+
+ mode = in_be32(&diu_reg->diu_mode);
+ if (mode == MFB_MODE0) {
+ pr_info("%s: DIU OFF\n", __func__);
+ goto out;
+ }
+
+ desc = in_be32(&diu_reg->desc[0]);
+ vaddr = ioremap(desc, sizeof(struct diu_ad));
+ if (!vaddr) {
+ pr_err("Can't map DIU area desc.\n");
+ goto out;
+ }
+ memcpy(&diu_shared_fb.ad0, vaddr, sizeof(struct diu_ad));
+ /* flush fb area descriptor */
+ dst = (unsigned long)&diu_shared_fb.ad0;
+ flush_dcache_range(dst, dst + sizeof(struct diu_ad) - 1);
+
+ res = in_be32(&diu_reg->disp_size);
+ pix_fmt = in_le32(vaddr);
+ bpp = ((pix_fmt >> 16) & 0x3) + 1;
+ diu_shared_fb.fb_phys = in_le32(vaddr + 4);
+ diu_shared_fb.fb_len = ((res & 0xfff0000) >> 16) * (res & 0xfff) * bpp;
+ diu_shared_fb.in_use = true;
+ iounmap(vaddr);
+
+ desc = in_be32(&diu_reg->gamma);
+ vaddr = ioremap(desc, sizeof(diu_shared_fb.gamma));
+ if (!vaddr) {
+ pr_err("Can't map DIU area desc.\n");
+ diu_shared_fb.in_use = false;
+ goto out;
+ }
+ memcpy(&diu_shared_fb.gamma, vaddr, sizeof(diu_shared_fb.gamma));
+ /* flush gamma table */
+ dst = (unsigned long)&diu_shared_fb.gamma;
+ flush_dcache_range(dst, dst + sizeof(diu_shared_fb.gamma) - 1);
+
+ iounmap(vaddr);
+ out_be32(&diu_reg->gamma, virt_to_phys(&diu_shared_fb.gamma));
+ out_be32(&diu_reg->desc[1], 0);
+ out_be32(&diu_reg->desc[2], 0);
+ out_be32(&diu_reg->desc[0], virt_to_phys(&diu_shared_fb.ad0));
+
+out:
+ iounmap(diu_reg);
+}
+
+static void __init mpc512x_setup_diu(void)
+{
+ int ret;
+
+ /*
+ * We do not allocate and configure new area for bitmap buffer
+ * because it would require copying bitmap data (splash image)
+ * and so negatively affect boot time. Instead we reserve the
+ * already configured frame buffer area so that it won't be
+ * destroyed. The starting address of the area to reserve and
+ * also it's length is passed to memblock_reserve(). It will be
+ * freed later on first open of fbdev, when splash image is not
+ * needed any more.
+ */
+ if (diu_shared_fb.in_use) {
+ ret = memblock_reserve(diu_shared_fb.fb_phys,
+ diu_shared_fb.fb_len);
+ if (ret) {
+ pr_err("%s: reserve bootmem failed\n", __func__);
+ diu_shared_fb.in_use = false;
+ }
+ }
+
+ diu_ops.set_pixel_clock = mpc512x_set_pixel_clock;
+ diu_ops.valid_monitor_port = mpc512x_valid_monitor_port;
+ diu_ops.release_bootmem = mpc512x_release_bootmem;
+}
+
+void __init mpc512x_init_IRQ(void)
+{
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-ipic");
+ if (!np)
+ return;
+
+ ipic_init(np, 0);
+ of_node_put(np);
+
+ /*
+ * Initialize the default interrupt mapping priorities,
+ * in case the boot rom changed something on us.
+ */
+ ipic_set_default_priority();
+}
+
+/*
+ * Nodes to do bus probe on, soc and localbus
+ */
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .compatible = "fsl,mpc5121-immr", },
+ { .compatible = "fsl,mpc5121-localbus", },
+ { .compatible = "fsl,mpc5121-mbx", },
+ { .compatible = "fsl,mpc5121-nfc", },
+ { .compatible = "fsl,mpc5121-sram", },
+ { .compatible = "fsl,mpc5121-pci", },
+ { .compatible = "gpio-leds", },
+ {},
+};
+
+static void __init mpc512x_declare_of_platform_devices(void)
+{
+ if (of_platform_bus_probe(NULL, of_bus_ids, NULL))
+ printk(KERN_ERR __FILE__ ": "
+ "Error while probing of_platform bus\n");
+}
+
+#define DEFAULT_FIFO_SIZE 16
+
+const char *__init mpc512x_select_psc_compat(void)
+{
+ if (of_machine_is_compatible("fsl,mpc5121"))
+ return "fsl,mpc5121-psc";
+
+ if (of_machine_is_compatible("fsl,mpc5125"))
+ return "fsl,mpc5125-psc";
+
+ return NULL;
+}
+
+static const char *__init mpc512x_select_reset_compat(void)
+{
+ if (of_machine_is_compatible("fsl,mpc5121"))
+ return "fsl,mpc5121-reset";
+
+ if (of_machine_is_compatible("fsl,mpc5125"))
+ return "fsl,mpc5125-reset";
+
+ return NULL;
+}
+
+static unsigned int __init get_fifo_size(struct device_node *np,
+ char *prop_name)
+{
+ const unsigned int *fp;
+
+ fp = of_get_property(np, prop_name, NULL);
+ if (fp)
+ return *fp;
+
+ pr_warn("no %s property in %pOF node, defaulting to %d\n",
+ prop_name, np, DEFAULT_FIFO_SIZE);
+
+ return DEFAULT_FIFO_SIZE;
+}
+
+#define FIFOC(_base) ((struct mpc512x_psc_fifo __iomem *) \
+ ((u32)(_base) + sizeof(struct mpc52xx_psc)))
+
+/* Init PSC FIFO space for TX and RX slices */
+static void __init mpc512x_psc_fifo_init(void)
+{
+ struct device_node *np;
+ void __iomem *psc;
+ unsigned int tx_fifo_size;
+ unsigned int rx_fifo_size;
+ const char *psc_compat;
+ int fifobase = 0; /* current fifo address in 32 bit words */
+
+ psc_compat = mpc512x_select_psc_compat();
+ if (!psc_compat) {
+ pr_err("%s: no compatible devices found\n", __func__);
+ return;
+ }
+
+ for_each_compatible_node(np, NULL, psc_compat) {
+ tx_fifo_size = get_fifo_size(np, "fsl,tx-fifo-size");
+ rx_fifo_size = get_fifo_size(np, "fsl,rx-fifo-size");
+
+ /* size in register is in 4 byte units */
+ tx_fifo_size /= 4;
+ rx_fifo_size /= 4;
+ if (!tx_fifo_size)
+ tx_fifo_size = 1;
+ if (!rx_fifo_size)
+ rx_fifo_size = 1;
+
+ psc = of_iomap(np, 0);
+ if (!psc) {
+ pr_err("%s: Can't map %pOF device\n",
+ __func__, np);
+ continue;
+ }
+
+ /* FIFO space is 4KiB, check if requested size is available */
+ if ((fifobase + tx_fifo_size + rx_fifo_size) > 0x1000) {
+ pr_err("%s: no fifo space available for %pOF\n",
+ __func__, np);
+ iounmap(psc);
+ /*
+ * chances are that another device requests less
+ * fifo space, so we continue.
+ */
+ continue;
+ }
+
+ /* set tx and rx fifo size registers */
+ out_be32(&FIFOC(psc)->txsz, (fifobase << 16) | tx_fifo_size);
+ fifobase += tx_fifo_size;
+ out_be32(&FIFOC(psc)->rxsz, (fifobase << 16) | rx_fifo_size);
+ fifobase += rx_fifo_size;
+
+ /* reset and enable the slices */
+ out_be32(&FIFOC(psc)->txcmd, 0x80);
+ out_be32(&FIFOC(psc)->txcmd, 0x01);
+ out_be32(&FIFOC(psc)->rxcmd, 0x80);
+ out_be32(&FIFOC(psc)->rxcmd, 0x01);
+
+ iounmap(psc);
+ }
+}
+
+static void __init mpc512x_restart_init(void)
+{
+ struct device_node *np;
+ const char *reset_compat;
+
+ reset_compat = mpc512x_select_reset_compat();
+ np = of_find_compatible_node(NULL, NULL, reset_compat);
+ if (!np)
+ return;
+
+ reset_module_base = of_iomap(np, 0);
+ of_node_put(np);
+}
+
+void __init mpc512x_init_early(void)
+{
+ mpc512x_restart_init();
+ if (IS_ENABLED(CONFIG_FB_FSL_DIU))
+ mpc512x_init_diu();
+}
+
+void __init mpc512x_init(void)
+{
+ mpc5121_clk_init();
+ mpc512x_declare_of_platform_devices();
+ mpc512x_psc_fifo_init();
+}
+
+void __init mpc512x_setup_arch(void)
+{
+ if (IS_ENABLED(CONFIG_FB_FSL_DIU))
+ mpc512x_setup_diu();
+}
+
+/**
+ * mpc512x_cs_config - Setup chip select configuration
+ * @cs: chip select number
+ * @val: chip select configuration value
+ *
+ * Perform chip select configuration for devices on LocalPlus Bus.
+ * Intended to dynamically reconfigure the chip select parameters
+ * for configurable devices on the bus.
+ */
+int mpc512x_cs_config(unsigned int cs, u32 val)
+{
+ static struct mpc512x_lpc __iomem *lpc;
+ struct device_node *np;
+
+ if (cs > 7)
+ return -EINVAL;
+
+ if (!lpc) {
+ np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-lpc");
+ lpc = of_iomap(np, 0);
+ of_node_put(np);
+ if (!lpc)
+ return -ENOMEM;
+ }
+
+ out_be32(&lpc->cs_cfg[cs], val);
+ return 0;
+}
+EXPORT_SYMBOL(mpc512x_cs_config);
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
new file mode 100644
index 000000000..ce51cfeeb
--- /dev/null
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2010 DENX Software Engineering
+ *
+ * Anatolij Gustschin, <agust@denx.de>
+ *
+ * PDM360NG board setup
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+
+#include "mpc512x.h"
+
+#if defined(CONFIG_TOUCHSCREEN_ADS7846) || \
+ defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE)
+#include <linux/interrupt.h>
+#include <linux/spi/ads7846.h>
+#include <linux/spi/spi.h>
+#include <linux/notifier.h>
+
+static void *pdm360ng_gpio_base;
+
+static int pdm360ng_get_pendown_state(void)
+{
+ u32 reg;
+
+ reg = in_be32(pdm360ng_gpio_base + 0xc);
+ if (reg & 0x40)
+ setbits32(pdm360ng_gpio_base + 0xc, 0x40);
+
+ reg = in_be32(pdm360ng_gpio_base + 0x8);
+
+ /* return 1 if pen is down */
+ return (reg & 0x40) == 0;
+}
+
+static struct ads7846_platform_data pdm360ng_ads7846_pdata = {
+ .model = 7845,
+ .get_pendown_state = pdm360ng_get_pendown_state,
+ .irq_flags = IRQF_TRIGGER_LOW,
+};
+
+static int __init pdm360ng_penirq_init(void)
+{
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-gpio");
+ if (!np) {
+ pr_err("%s: Can't find 'mpc5121-gpio' node\n", __func__);
+ return -ENODEV;
+ }
+
+ pdm360ng_gpio_base = of_iomap(np, 0);
+ of_node_put(np);
+ if (!pdm360ng_gpio_base) {
+ pr_err("%s: Can't map gpio regs.\n", __func__);
+ return -ENODEV;
+ }
+ out_be32(pdm360ng_gpio_base + 0xc, 0xffffffff);
+ setbits32(pdm360ng_gpio_base + 0x18, 0x2000);
+ setbits32(pdm360ng_gpio_base + 0x10, 0x40);
+
+ return 0;
+}
+
+static int pdm360ng_touchscreen_notifier_call(struct notifier_block *nb,
+ unsigned long event, void *__dev)
+{
+ struct device *dev = __dev;
+
+ if ((event == BUS_NOTIFY_ADD_DEVICE) &&
+ of_device_is_compatible(dev->of_node, "ti,ads7846")) {
+ dev->platform_data = &pdm360ng_ads7846_pdata;
+ return NOTIFY_OK;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block pdm360ng_touchscreen_nb = {
+ .notifier_call = pdm360ng_touchscreen_notifier_call,
+};
+
+static void __init pdm360ng_touchscreen_init(void)
+{
+ if (pdm360ng_penirq_init())
+ return;
+
+ bus_register_notifier(&spi_bus_type, &pdm360ng_touchscreen_nb);
+}
+#else
+static inline void __init pdm360ng_touchscreen_init(void)
+{
+}
+#endif /* CONFIG_TOUCHSCREEN_ADS7846 */
+
+void __init pdm360ng_init(void)
+{
+ mpc512x_init();
+ pdm360ng_touchscreen_init();
+}
+
+static int __init pdm360ng_probe(void)
+{
+ mpc512x_init_early();
+
+ return 1;
+}
+
+define_machine(pdm360ng) {
+ .name = "PDM360NG",
+ .compatible = "ifm,pdm360ng",
+ .probe = pdm360ng_probe,
+ .setup_arch = mpc512x_setup_arch,
+ .init = pdm360ng_init,
+ .init_IRQ = mpc512x_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc512x_restart,
+};
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
new file mode 100644
index 000000000..384e4bef2
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MPC52xx
+ bool "52xx-based boards"
+ depends on PPC_BOOK3S_32
+ select COMMON_CLK
+ select HAVE_PCI
+
+config PPC_MPC5200_SIMPLE
+ bool "Generic support for simple MPC5200 based boards"
+ depends on PPC_MPC52xx
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for a simple MPC52xx based boards which
+ do not need a custom platform specific setup. Such boards are
+ supported assuming the following:
+
+ - GPIO pins are configured by the firmware,
+ - CDM configuration (clocking) is setup correctly by firmware,
+ - if the 'fsl,has-wdt' property is present in one of the
+ gpt nodes, then it is safe to use such gpt to reset the board,
+ - PCI is supported if enabled in the kernel configuration
+ and if there is a PCI bus node defined in the device tree.
+
+ Boards that are compatible with this generic platform support
+ are:
+ intercontrol,digsy-mtc
+ phytec,pcm030
+ phytec,pcm032
+ promess,motionpro
+ schindler,cm5200
+ tqc,tqm5200
+
+config PPC_EFIKA
+ bool "bPlan Efika 5k2. MPC5200B based computer"
+ depends on PPC_MPC52xx
+ select PPC_RTAS
+ select PPC_HASH_MMU_NATIVE
+
+config PPC_LITE5200
+ bool "Freescale Lite5200 Eval Board"
+ depends on PPC_MPC52xx
+ select DEFAULT_UIMAGE
+
+config PPC_MEDIA5200
+ bool "Freescale Media5200 Eval Board"
+ depends on PPC_MPC52xx
+ select DEFAULT_UIMAGE
+
+config PPC_MPC5200_BUGFIX
+ bool "MPC5200 (L25R) bugfix support"
+ depends on PPC_MPC52xx
+ help
+ Enable workarounds for original MPC5200 errata. This is not required
+ for MPC5200B based boards.
+
+ It is safe to say 'Y' here
diff --git a/arch/powerpc/platforms/52xx/Makefile b/arch/powerpc/platforms/52xx/Makefile
new file mode 100644
index 000000000..1b1f72d83
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for 52xx based boards
+#
+obj-y += mpc52xx_pic.o mpc52xx_common.o mpc52xx_gpt.o
+obj-$(CONFIG_PCI) += mpc52xx_pci.o
+
+obj-$(CONFIG_PPC_MPC5200_SIMPLE) += mpc5200_simple.o
+obj-$(CONFIG_PPC_EFIKA) += efika.o
+obj-$(CONFIG_PPC_LITE5200) += lite5200.o
+obj-$(CONFIG_PPC_MEDIA5200) += media5200.o
+
+obj-$(CONFIG_PM) += mpc52xx_sleep.o mpc52xx_pm.o
+ifdef CONFIG_PPC_LITE5200
+ obj-$(CONFIG_PM) += lite5200_sleep.o lite5200_pm.o
+endif
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
new file mode 100644
index 000000000..aa82e6b43
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -0,0 +1,233 @@
+/*
+ * Efika 5K2 platform code
+ * Some code really inspired from the lite5200b platform.
+ *
+ * Copyright (C) 2006 bplan GmbH
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <generated/utsrelease.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <asm/dma.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/mpc52xx.h>
+
+#define EFIKA_PLATFORM_NAME "Efika"
+
+
+/* ------------------------------------------------------------------------ */
+/* PCI accesses thru RTAS */
+/* ------------------------------------------------------------------------ */
+
+#ifdef CONFIG_PCI
+
+/*
+ * Access functions for PCI config space using RTAS calls.
+ */
+static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+ int len, u32 * val)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+ | (((bus->number - hose->first_busno) & 0xff) << 16)
+ | (hose->global_number << 24);
+ int ret = -1;
+ int rval;
+
+ rval = rtas_call(rtas_function_token(RTAS_FN_READ_PCI_CONFIG), 2, 2, &ret, addr, len);
+ *val = ret;
+ return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_write_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 val)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+ | (((bus->number - hose->first_busno) & 0xff) << 16)
+ | (hose->global_number << 24);
+ int rval;
+
+ rval = rtas_call(rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG), 3, 1, NULL,
+ addr, len, val);
+ return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops rtas_pci_ops = {
+ .read = rtas_read_config,
+ .write = rtas_write_config,
+};
+
+
+static void __init efika_pcisetup(void)
+{
+ const int *bus_range;
+ int len;
+ struct pci_controller *hose;
+ struct device_node *root;
+ struct device_node *pcictrl;
+
+ root = of_find_node_by_path("/");
+ if (root == NULL) {
+ printk(KERN_WARNING EFIKA_PLATFORM_NAME
+ ": Unable to find the root node\n");
+ return;
+ }
+
+ for_each_child_of_node(root, pcictrl)
+ if (of_node_name_eq(pcictrl, "pci"))
+ break;
+
+ of_node_put(root);
+
+ if (pcictrl == NULL) {
+ printk(KERN_WARNING EFIKA_PLATFORM_NAME
+ ": Unable to find the PCI bridge node\n");
+ return;
+ }
+
+ bus_range = of_get_property(pcictrl, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ printk(KERN_WARNING EFIKA_PLATFORM_NAME
+ ": Can't get bus-range for %pOF\n", pcictrl);
+ goto out_put;
+ }
+
+ if (bus_range[1] == bus_range[0])
+ printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI bus %d",
+ bus_range[0]);
+ else
+ printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI buses %d..%d",
+ bus_range[0], bus_range[1]);
+ printk(" controlled by %pOF\n", pcictrl);
+ printk("\n");
+
+ hose = pcibios_alloc_controller(pcictrl);
+ if (!hose) {
+ printk(KERN_WARNING EFIKA_PLATFORM_NAME
+ ": Can't allocate PCI controller structure for %pOF\n",
+ pcictrl);
+ goto out_put;
+ }
+
+ hose->first_busno = bus_range[0];
+ hose->last_busno = bus_range[1];
+ hose->ops = &rtas_pci_ops;
+
+ pci_process_bridge_OF_ranges(hose, pcictrl, 0);
+ return;
+out_put:
+ of_node_put(pcictrl);
+}
+
+#else
+static void __init efika_pcisetup(void)
+{}
+#endif
+
+
+
+/* ------------------------------------------------------------------------ */
+/* Platform setup */
+/* ------------------------------------------------------------------------ */
+
+static void efika_show_cpuinfo(struct seq_file *m)
+{
+ struct device_node *root;
+ const char *revision;
+ const char *codegendescription;
+ const char *codegenvendor;
+
+ root = of_find_node_by_path("/");
+ if (!root)
+ return;
+
+ revision = of_get_property(root, "revision", NULL);
+ codegendescription = of_get_property(root, "CODEGEN,description", NULL);
+ codegenvendor = of_get_property(root, "CODEGEN,vendor", NULL);
+
+ if (codegendescription)
+ seq_printf(m, "machine\t\t: %s\n", codegendescription);
+ else
+ seq_printf(m, "machine\t\t: Efika\n");
+
+ if (revision)
+ seq_printf(m, "revision\t: %s\n", revision);
+
+ if (codegenvendor)
+ seq_printf(m, "vendor\t\t: %s\n", codegenvendor);
+
+ of_node_put(root);
+}
+
+#ifdef CONFIG_PM
+static void efika_suspend_prepare(void __iomem *mbar)
+{
+ u8 pin = 4; /* GPIO_WKUP_4 (GPIO_PSC6_0 - IRDA_RX) */
+ u8 level = 1; /* wakeup on high level */
+ /* IOW. to wake it up, short pins 1 and 3 on IRDA connector */
+ mpc52xx_set_wakeup_gpio(pin, level);
+}
+#endif
+
+static void __init efika_setup_arch(void)
+{
+ rtas_initialize();
+
+ /* Map important registers from the internal memory map */
+ mpc52xx_map_common_devices();
+
+#ifdef CONFIG_PM
+ mpc52xx_suspend.board_suspend_prepare = efika_suspend_prepare;
+ mpc52xx_pm_init();
+#endif
+
+ if (ppc_md.progress)
+ ppc_md.progress("Linux/PPC " UTS_RELEASE " running on Efika ;-)\n", 0x0);
+}
+
+static int __init efika_probe(void)
+{
+ const char *model = of_get_property(of_root, "model", NULL);
+
+ if (model == NULL)
+ return 0;
+ if (strcmp(model, "EFIKA5K2"))
+ return 0;
+
+ DMA_MODE_READ = 0x44;
+ DMA_MODE_WRITE = 0x48;
+
+ pm_power_off = rtas_power_off;
+
+ return 1;
+}
+
+define_machine(efika)
+{
+ .name = EFIKA_PLATFORM_NAME,
+ .probe = efika_probe,
+ .setup_arch = efika_setup_arch,
+ .discover_phbs = efika_pcisetup,
+ .init = mpc52xx_declare_of_platform_devices,
+ .show_cpuinfo = efika_show_cpuinfo,
+ .init_IRQ = mpc52xx_init_irq,
+ .get_irq = mpc52xx_get_irq,
+ .restart = rtas_restart,
+ .halt = rtas_halt,
+ .set_rtc_time = rtas_set_rtc_time,
+ .get_rtc_time = rtas_get_rtc_time,
+ .progress = rtas_progress,
+ .get_boot_time = rtas_get_boot_time,
+#ifdef CONFIG_PCI
+ .phys_mem_access_prot = pci_phys_mem_access_prot,
+#endif
+};
+
diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c
new file mode 100644
index 000000000..0fd67b3ff
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/lite5200.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale Lite5200 board support
+ *
+ * Written by: Grant Likely <grant.likely@secretlab.ca>
+ *
+ * Copyright (C) Secret Lab Technologies Ltd. 2006. All rights reserved.
+ * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Description:
+ */
+
+#undef DEBUG
+
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/mpc52xx.h>
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+
+/* mpc5200 device tree match tables */
+static const struct of_device_id mpc5200_cdm_ids[] __initconst = {
+ { .compatible = "fsl,mpc5200-cdm", },
+ { .compatible = "mpc5200-cdm", },
+ {}
+};
+
+static const struct of_device_id mpc5200_gpio_ids[] __initconst = {
+ { .compatible = "fsl,mpc5200-gpio", },
+ { .compatible = "mpc5200-gpio", },
+ {}
+};
+
+/*
+ * Fix clock configuration.
+ *
+ * Firmware is supposed to be responsible for this. If you are creating a
+ * new board port, do *NOT* duplicate this code. Fix your boot firmware
+ * to set it correctly in the first place
+ */
+static void __init
+lite5200_fix_clock_config(void)
+{
+ struct device_node *np;
+ struct mpc52xx_cdm __iomem *cdm;
+ /* Map zones */
+ np = of_find_matching_node(NULL, mpc5200_cdm_ids);
+ cdm = of_iomap(np, 0);
+ of_node_put(np);
+ if (!cdm) {
+ printk(KERN_ERR "%s() failed; expect abnormal behaviour\n",
+ __func__);
+ return;
+ }
+
+ /* Use internal 48 Mhz */
+ out_8(&cdm->ext_48mhz_en, 0x00);
+ out_8(&cdm->fd_enable, 0x01);
+ if (in_be32(&cdm->rstcfg) & 0x40) /* Assumes 33Mhz clock */
+ out_be16(&cdm->fd_counters, 0x0001);
+ else
+ out_be16(&cdm->fd_counters, 0x5555);
+
+ /* Unmap the regs */
+ iounmap(cdm);
+}
+
+/*
+ * Fix setting of port_config register.
+ *
+ * Firmware is supposed to be responsible for this. If you are creating a
+ * new board port, do *NOT* duplicate this code. Fix your boot firmware
+ * to set it correctly in the first place
+ */
+static void __init
+lite5200_fix_port_config(void)
+{
+ struct device_node *np;
+ struct mpc52xx_gpio __iomem *gpio;
+ u32 port_config;
+
+ np = of_find_matching_node(NULL, mpc5200_gpio_ids);
+ gpio = of_iomap(np, 0);
+ of_node_put(np);
+ if (!gpio) {
+ printk(KERN_ERR "%s() failed. expect abnormal behavior\n",
+ __func__);
+ return;
+ }
+
+ /* Set port config */
+ port_config = in_be32(&gpio->port_config);
+
+ port_config &= ~0x00800000; /* 48Mhz internal, pin is GPIO */
+
+ port_config &= ~0x00007000; /* USB port : Differential mode */
+ port_config |= 0x00001000; /* USB 1 only */
+
+ port_config &= ~0x03000000; /* ATA CS is on csb_4/5 */
+ port_config |= 0x01000000;
+
+ pr_debug("port_config: old:%x new:%x\n",
+ in_be32(&gpio->port_config), port_config);
+ out_be32(&gpio->port_config, port_config);
+
+ /* Unmap zone */
+ iounmap(gpio);
+}
+
+#ifdef CONFIG_PM
+static void lite5200_suspend_prepare(void __iomem *mbar)
+{
+ u8 pin = 1; /* GPIO_WKUP_1 (GPIO_PSC2_4) */
+ u8 level = 0; /* wakeup on low level */
+ mpc52xx_set_wakeup_gpio(pin, level);
+
+ /*
+ * power down usb port
+ * this needs to be called before of-ohci suspend code
+ */
+
+ /* set ports to "power switched" and "powered at the same time"
+ * USB Rh descriptor A: NPS = 0, PSM = 0 */
+ out_be32(mbar + 0x1048, in_be32(mbar + 0x1048) & ~0x300);
+ /* USB Rh status: LPS = 1 - turn off power */
+ out_be32(mbar + 0x1050, 0x00000001);
+}
+
+static void lite5200_resume_finish(void __iomem *mbar)
+{
+ /* USB Rh status: LPSC = 1 - turn on power */
+ out_be32(mbar + 0x1050, 0x00010000);
+}
+#endif
+
+static void __init lite5200_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("lite5200_setup_arch()", 0);
+
+ /* Map important registers from the internal memory map */
+ mpc52xx_map_common_devices();
+
+ /* Some mpc5200 & mpc5200b related configuration */
+ mpc5200_setup_xlb_arbiter();
+
+ /* Fix things that firmware should have done. */
+ lite5200_fix_clock_config();
+ lite5200_fix_port_config();
+
+#ifdef CONFIG_PM
+ mpc52xx_suspend.board_suspend_prepare = lite5200_suspend_prepare;
+ mpc52xx_suspend.board_resume_finish = lite5200_resume_finish;
+ lite5200_pm_init();
+#endif
+}
+
+static const char * const board[] __initconst = {
+ "fsl,lite5200",
+ "fsl,lite5200b",
+ NULL,
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init lite5200_probe(void)
+{
+ return of_device_compatible_match(of_root, board);
+}
+
+define_machine(lite5200) {
+ .name = "lite5200",
+ .probe = lite5200_probe,
+ .setup_arch = lite5200_setup_arch,
+ .discover_phbs = mpc52xx_setup_pci,
+ .init = mpc52xx_declare_of_platform_devices,
+ .init_IRQ = mpc52xx_init_irq,
+ .get_irq = mpc52xx_get_irq,
+ .restart = mpc52xx_restart,
+};
diff --git a/arch/powerpc/platforms/52xx/lite5200_pm.c b/arch/powerpc/platforms/52xx/lite5200_pm.c
new file mode 100644
index 000000000..4900f5f48
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/lite5200_pm.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/suspend.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/mpc52xx.h>
+#include <asm/switch_to.h>
+
+/* defined in lite5200_sleep.S and only used here */
+extern void lite5200_low_power(void __iomem *sram, void __iomem *mbar);
+
+static struct mpc52xx_cdm __iomem *cdm;
+static struct mpc52xx_intr __iomem *pic;
+static struct mpc52xx_sdma __iomem *bes;
+static struct mpc52xx_xlb __iomem *xlb;
+static struct mpc52xx_gpio __iomem *gps;
+static struct mpc52xx_gpio_wkup __iomem *gpw;
+static void __iomem *pci;
+static void __iomem *sram;
+static const int sram_size = 0x4000; /* 16 kBytes */
+static void __iomem *mbar;
+
+static suspend_state_t lite5200_pm_target_state;
+
+static int lite5200_pm_valid(suspend_state_t state)
+{
+ switch (state) {
+ case PM_SUSPEND_STANDBY:
+ case PM_SUSPEND_MEM:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int lite5200_pm_begin(suspend_state_t state)
+{
+ if (lite5200_pm_valid(state)) {
+ lite5200_pm_target_state = state;
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static int lite5200_pm_prepare(void)
+{
+ struct device_node *np;
+ static const struct of_device_id immr_ids[] = {
+ { .compatible = "fsl,mpc5200-immr", },
+ { .compatible = "fsl,mpc5200b-immr", },
+ { .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
+ { .type = "builtin", .compatible = "mpc5200", }, /* efika */
+ {}
+ };
+ struct resource res;
+
+ /* deep sleep? let mpc52xx code handle that */
+ if (lite5200_pm_target_state == PM_SUSPEND_STANDBY)
+ return mpc52xx_pm_prepare();
+
+ if (lite5200_pm_target_state != PM_SUSPEND_MEM)
+ return -EINVAL;
+
+ /* map registers */
+ np = of_find_matching_node(NULL, immr_ids);
+ of_address_to_resource(np, 0, &res);
+ of_node_put(np);
+
+ mbar = ioremap(res.start, 0xC000);
+ if (!mbar) {
+ printk(KERN_ERR "%s:%i Error mapping registers\n", __func__, __LINE__);
+ return -ENOSYS;
+ }
+
+ cdm = mbar + 0x200;
+ pic = mbar + 0x500;
+ gps = mbar + 0xb00;
+ gpw = mbar + 0xc00;
+ pci = mbar + 0xd00;
+ bes = mbar + 0x1200;
+ xlb = mbar + 0x1f00;
+ sram = mbar + 0x8000;
+
+ return 0;
+}
+
+/* save and restore registers not bound to any real devices */
+static struct mpc52xx_cdm scdm;
+static struct mpc52xx_intr spic;
+static struct mpc52xx_sdma sbes;
+static struct mpc52xx_xlb sxlb;
+static struct mpc52xx_gpio sgps;
+static struct mpc52xx_gpio_wkup sgpw;
+static char spci[0x200];
+
+static void lite5200_save_regs(void)
+{
+ _memcpy_fromio(&spic, pic, sizeof(*pic));
+ _memcpy_fromio(&sbes, bes, sizeof(*bes));
+ _memcpy_fromio(&scdm, cdm, sizeof(*cdm));
+ _memcpy_fromio(&sxlb, xlb, sizeof(*xlb));
+ _memcpy_fromio(&sgps, gps, sizeof(*gps));
+ _memcpy_fromio(&sgpw, gpw, sizeof(*gpw));
+ _memcpy_fromio(spci, pci, 0x200);
+
+ _memcpy_fromio(saved_sram, sram, sram_size);
+}
+
+static void lite5200_restore_regs(void)
+{
+ int i;
+ _memcpy_toio(sram, saved_sram, sram_size);
+
+ /* PCI Configuration */
+ _memcpy_toio(pci, spci, 0x200);
+
+ /*
+ * GPIOs. Interrupt Master Enable has higher address then other
+ * registers, so just memcpy is ok.
+ */
+ _memcpy_toio(gpw, &sgpw, sizeof(*gpw));
+ _memcpy_toio(gps, &sgps, sizeof(*gps));
+
+
+ /* XLB Arbitrer */
+ out_be32(&xlb->snoop_window, sxlb.snoop_window);
+ out_be32(&xlb->master_priority, sxlb.master_priority);
+ out_be32(&xlb->master_pri_enable, sxlb.master_pri_enable);
+
+ /* enable */
+ out_be32(&xlb->int_enable, sxlb.int_enable);
+ out_be32(&xlb->config, sxlb.config);
+
+
+ /* CDM - Clock Distribution Module */
+ out_8(&cdm->ipb_clk_sel, scdm.ipb_clk_sel);
+ out_8(&cdm->pci_clk_sel, scdm.pci_clk_sel);
+
+ out_8(&cdm->ext_48mhz_en, scdm.ext_48mhz_en);
+ out_8(&cdm->fd_enable, scdm.fd_enable);
+ out_be16(&cdm->fd_counters, scdm.fd_counters);
+
+ out_be32(&cdm->clk_enables, scdm.clk_enables);
+
+ out_8(&cdm->osc_disable, scdm.osc_disable);
+
+ out_be16(&cdm->mclken_div_psc1, scdm.mclken_div_psc1);
+ out_be16(&cdm->mclken_div_psc2, scdm.mclken_div_psc2);
+ out_be16(&cdm->mclken_div_psc3, scdm.mclken_div_psc3);
+ out_be16(&cdm->mclken_div_psc6, scdm.mclken_div_psc6);
+
+
+ /* BESTCOMM */
+ out_be32(&bes->taskBar, sbes.taskBar);
+ out_be32(&bes->currentPointer, sbes.currentPointer);
+ out_be32(&bes->endPointer, sbes.endPointer);
+ out_be32(&bes->variablePointer, sbes.variablePointer);
+
+ out_8(&bes->IntVect1, sbes.IntVect1);
+ out_8(&bes->IntVect2, sbes.IntVect2);
+ out_be16(&bes->PtdCntrl, sbes.PtdCntrl);
+
+ for (i=0; i<32; i++)
+ out_8(&bes->ipr[i], sbes.ipr[i]);
+
+ out_be32(&bes->cReqSelect, sbes.cReqSelect);
+ out_be32(&bes->task_size0, sbes.task_size0);
+ out_be32(&bes->task_size1, sbes.task_size1);
+ out_be32(&bes->MDEDebug, sbes.MDEDebug);
+ out_be32(&bes->ADSDebug, sbes.ADSDebug);
+ out_be32(&bes->Value1, sbes.Value1);
+ out_be32(&bes->Value2, sbes.Value2);
+ out_be32(&bes->Control, sbes.Control);
+ out_be32(&bes->Status, sbes.Status);
+ out_be32(&bes->PTDDebug, sbes.PTDDebug);
+
+ /* restore tasks */
+ for (i=0; i<16; i++)
+ out_be16(&bes->tcr[i], sbes.tcr[i]);
+
+ /* enable interrupts */
+ out_be32(&bes->IntPend, sbes.IntPend);
+ out_be32(&bes->IntMask, sbes.IntMask);
+
+
+ /* PIC */
+ out_be32(&pic->per_pri1, spic.per_pri1);
+ out_be32(&pic->per_pri2, spic.per_pri2);
+ out_be32(&pic->per_pri3, spic.per_pri3);
+
+ out_be32(&pic->main_pri1, spic.main_pri1);
+ out_be32(&pic->main_pri2, spic.main_pri2);
+
+ out_be32(&pic->enc_status, spic.enc_status);
+
+ /* unmask and enable interrupts */
+ out_be32(&pic->per_mask, spic.per_mask);
+ out_be32(&pic->main_mask, spic.main_mask);
+ out_be32(&pic->ctrl, spic.ctrl);
+}
+
+static int lite5200_pm_enter(suspend_state_t state)
+{
+ /* deep sleep? let mpc52xx code handle that */
+ if (state == PM_SUSPEND_STANDBY) {
+ return mpc52xx_pm_enter(state);
+ }
+
+ lite5200_save_regs();
+
+ /* effectively save FP regs */
+ enable_kernel_fp();
+
+ lite5200_low_power(sram, mbar);
+
+ lite5200_restore_regs();
+
+ iounmap(mbar);
+ return 0;
+}
+
+static void lite5200_pm_finish(void)
+{
+ /* deep sleep? let mpc52xx code handle that */
+ if (lite5200_pm_target_state == PM_SUSPEND_STANDBY)
+ mpc52xx_pm_finish();
+}
+
+static void lite5200_pm_end(void)
+{
+ lite5200_pm_target_state = PM_SUSPEND_ON;
+}
+
+static const struct platform_suspend_ops lite5200_pm_ops = {
+ .valid = lite5200_pm_valid,
+ .begin = lite5200_pm_begin,
+ .prepare = lite5200_pm_prepare,
+ .enter = lite5200_pm_enter,
+ .finish = lite5200_pm_finish,
+ .end = lite5200_pm_end,
+};
+
+int __init lite5200_pm_init(void)
+{
+ suspend_set_ops(&lite5200_pm_ops);
+ return 0;
+}
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
new file mode 100644
index 000000000..0b12647e7
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -0,0 +1,422 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+
+
+#define SDRAM_CTRL 0x104
+#define SC_MODE_EN (1<<31)
+#define SC_CKE (1<<30)
+#define SC_REF_EN (1<<28)
+#define SC_SOFT_PRE (1<<1)
+
+#define GPIOW_GPIOE 0xc00
+#define GPIOW_DDR 0xc08
+#define GPIOW_DVO 0xc0c
+
+#define CDM_CE 0x214
+#define CDM_SDRAM (1<<3)
+
+
+/* helpers... beware: r10 and r4 are overwritten */
+#define SAVE_SPRN(reg, addr) \
+ mfspr r10, SPRN_##reg; \
+ stw r10, ((addr)*4)(r4);
+
+#define LOAD_SPRN(reg, addr) \
+ lwz r10, ((addr)*4)(r4); \
+ mtspr SPRN_##reg, r10; \
+ sync; \
+ isync;
+
+
+ .data
+registers:
+ .space 0x5c*4
+ .text
+
+/* ---------------------------------------------------------------------- */
+/* low-power mode with help of M68HLC908QT1 */
+
+ .globl lite5200_low_power
+lite5200_low_power:
+
+ mr r7, r3 /* save SRAM va */
+ mr r8, r4 /* save MBAR va */
+
+ /* setup wakeup address for u-boot at physical location 0x0 */
+ lis r3, CONFIG_KERNEL_START@h
+ lis r4, lite5200_wakeup@h
+ ori r4, r4, lite5200_wakeup@l
+ sub r4, r4, r3
+ stw r4, 0(r3)
+
+
+ /*
+ * save stuff BDI overwrites
+ * 0xf0 (0xe0->0x100 gets overwritten when BDI connected;
+ * even when CONFIG_BDI_SWITCH is disabled and MMU XLAT commented; heisenbug?))
+ * WARNING: self-refresh doesn't seem to work when BDI2000 is connected,
+ * possibly because BDI sets SDRAM registers before wakeup code does
+ */
+ lis r4, registers@h
+ ori r4, r4, registers@l
+ lwz r10, 0xf0(r3)
+ stw r10, (0x1d*4)(r4)
+
+ /* save registers to r4 [destroys r10] */
+ SAVE_SPRN(LR, 0x1c)
+ bl save_regs
+
+ /* flush caches [destroys r3, r4] */
+ bl flush_data_cache
+
+
+ /* copy code to sram */
+ mr r4, r7
+ li r3, (sram_code_end - sram_code)/4
+ mtctr r3
+ lis r3, sram_code@h
+ ori r3, r3, sram_code@l
+1:
+ lwz r5, 0(r3)
+ stw r5, 0(r4)
+ addi r3, r3, 4
+ addi r4, r4, 4
+ bdnz 1b
+
+ /* get tb_ticks_per_usec */
+ lis r3, tb_ticks_per_usec@h
+ lwz r11, tb_ticks_per_usec@l(r3)
+
+ /* disable I and D caches */
+ mfspr r3, SPRN_HID0
+ ori r3, r3, HID0_ICE | HID0_DCE
+ xori r3, r3, HID0_ICE | HID0_DCE
+ sync; isync;
+ mtspr SPRN_HID0, r3
+ sync; isync;
+
+ /* jump to sram */
+ mtlr r7
+ blrl
+ /* doesn't return */
+
+
+sram_code:
+ /* self refresh */
+ lwz r4, SDRAM_CTRL(r8)
+
+ /* send NOP (precharge) */
+ oris r4, r4, SC_MODE_EN@h /* mode_en */
+ stw r4, SDRAM_CTRL(r8)
+ sync
+
+ ori r4, r4, SC_SOFT_PRE /* soft_pre */
+ stw r4, SDRAM_CTRL(r8)
+ sync
+ xori r4, r4, SC_SOFT_PRE
+
+ xoris r4, r4, SC_MODE_EN@h /* !mode_en */
+ stw r4, SDRAM_CTRL(r8)
+ sync
+
+ /* delay (for NOP to finish) */
+ li r12, 1
+ bl udelay
+
+ /*
+ * mode_en must not be set when enabling self-refresh
+ * send AR with CKE low (self-refresh)
+ */
+ oris r4, r4, (SC_REF_EN | SC_CKE)@h
+ xoris r4, r4, (SC_CKE)@h /* ref_en !cke */
+ stw r4, SDRAM_CTRL(r8)
+ sync
+
+ /* delay (after !CKE there should be two cycles) */
+ li r12, 1
+ bl udelay
+
+ /* disable clock */
+ lwz r4, CDM_CE(r8)
+ ori r4, r4, CDM_SDRAM
+ xori r4, r4, CDM_SDRAM
+ stw r4, CDM_CE(r8)
+ sync
+
+ /* delay a bit */
+ li r12, 1
+ bl udelay
+
+
+ /* turn off with QT chip */
+ li r4, 0x02
+ stb r4, GPIOW_GPIOE(r8) /* enable gpio_wkup1 */
+ sync
+
+ stb r4, GPIOW_DVO(r8) /* "output" high */
+ sync
+ stb r4, GPIOW_DDR(r8) /* output */
+ sync
+ stb r4, GPIOW_DVO(r8) /* output high */
+ sync
+
+ /* 10uS delay */
+ li r12, 10
+ bl udelay
+
+ /* turn off */
+ li r4, 0
+ stb r4, GPIOW_DVO(r8) /* output low */
+ sync
+
+ /* wait until we're offline */
+ 1:
+ b 1b
+
+
+ /* local udelay in sram is needed */
+SYM_FUNC_START_LOCAL(udelay)
+ /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
+ mullw r12, r12, r11
+ mftb r13 /* start */
+ add r12, r13, r12 /* end */
+ 1:
+ mftb r13 /* current */
+ cmp cr0, r13, r12
+ blt 1b
+ blr
+SYM_FUNC_END(udelay)
+
+sram_code_end:
+
+
+
+/* uboot jumps here on resume */
+lite5200_wakeup:
+ bl restore_regs
+
+
+ /* HIDs, MSR */
+ LOAD_SPRN(HID1, 0x19)
+ LOAD_SPRN(HID2, 0x1a)
+
+
+ /* address translation is tricky (see turn_on_mmu) */
+ mfmsr r10
+ ori r10, r10, MSR_DR | MSR_IR
+
+
+ mtspr SPRN_SRR1, r10
+ lis r10, mmu_on@h
+ ori r10, r10, mmu_on@l
+ mtspr SPRN_SRR0, r10
+ sync
+ rfi
+mmu_on:
+ /* kernel offset (r4 is still set from restore_registers) */
+ addis r4, r4, CONFIG_KERNEL_START@h
+
+
+ /* restore MSR */
+ lwz r10, (4*0x1b)(r4)
+ mtmsr r10
+ sync; isync;
+
+ /* invalidate caches */
+ mfspr r10, SPRN_HID0
+ ori r5, r10, HID0_ICFI | HID0_DCI
+ mtspr SPRN_HID0, r5 /* invalidate caches */
+ sync; isync;
+ mtspr SPRN_HID0, r10
+ sync; isync;
+
+ /* enable caches */
+ lwz r10, (4*0x18)(r4)
+ mtspr SPRN_HID0, r10 /* restore (enable caches, DPM) */
+ /* ^ this has to be after address translation set in MSR */
+ sync
+ isync
+
+
+ /* restore 0xf0 (BDI2000) */
+ lis r3, CONFIG_KERNEL_START@h
+ lwz r10, (0x1d*4)(r4)
+ stw r10, 0xf0(r3)
+
+ LOAD_SPRN(LR, 0x1c)
+
+
+ blr
+_ASM_NOKPROBE_SYMBOL(lite5200_wakeup)
+
+
+/* ---------------------------------------------------------------------- */
+/* boring code: helpers */
+
+/* save registers */
+#define SAVE_BAT(n, addr) \
+ SAVE_SPRN(DBAT##n##L, addr); \
+ SAVE_SPRN(DBAT##n##U, addr+1); \
+ SAVE_SPRN(IBAT##n##L, addr+2); \
+ SAVE_SPRN(IBAT##n##U, addr+3);
+
+#define SAVE_SR(n, addr) \
+ mfsr r10, n; \
+ stw r10, ((addr)*4)(r4);
+
+#define SAVE_4SR(n, addr) \
+ SAVE_SR(n, addr); \
+ SAVE_SR(n+1, addr+1); \
+ SAVE_SR(n+2, addr+2); \
+ SAVE_SR(n+3, addr+3);
+
+SYM_FUNC_START_LOCAL(save_regs)
+ stw r0, 0(r4)
+ stw r1, 0x4(r4)
+ stw r2, 0x8(r4)
+ stmw r11, 0xc(r4) /* 0xc -> 0x5f, (0x18*4-1) */
+
+ SAVE_SPRN(HID0, 0x18)
+ SAVE_SPRN(HID1, 0x19)
+ SAVE_SPRN(HID2, 0x1a)
+ mfmsr r10
+ stw r10, (4*0x1b)(r4)
+ /*SAVE_SPRN(LR, 0x1c) have to save it before the call */
+ /* 0x1d reserved by 0xf0 */
+ SAVE_SPRN(RPA, 0x1e)
+ SAVE_SPRN(SDR1, 0x1f)
+
+ /* save MMU regs */
+ SAVE_BAT(0, 0x20)
+ SAVE_BAT(1, 0x24)
+ SAVE_BAT(2, 0x28)
+ SAVE_BAT(3, 0x2c)
+ SAVE_BAT(4, 0x30)
+ SAVE_BAT(5, 0x34)
+ SAVE_BAT(6, 0x38)
+ SAVE_BAT(7, 0x3c)
+
+ SAVE_4SR(0, 0x40)
+ SAVE_4SR(4, 0x44)
+ SAVE_4SR(8, 0x48)
+ SAVE_4SR(12, 0x4c)
+
+ SAVE_SPRN(SPRG0, 0x50)
+ SAVE_SPRN(SPRG1, 0x51)
+ SAVE_SPRN(SPRG2, 0x52)
+ SAVE_SPRN(SPRG3, 0x53)
+ SAVE_SPRN(SPRG4, 0x54)
+ SAVE_SPRN(SPRG5, 0x55)
+ SAVE_SPRN(SPRG6, 0x56)
+ SAVE_SPRN(SPRG7, 0x57)
+
+ SAVE_SPRN(IABR, 0x58)
+ SAVE_SPRN(DABR, 0x59)
+ SAVE_SPRN(TBRL, 0x5a)
+ SAVE_SPRN(TBRU, 0x5b)
+
+ blr
+SYM_FUNC_END(save_regs)
+
+
+/* restore registers */
+#define LOAD_BAT(n, addr) \
+ LOAD_SPRN(DBAT##n##L, addr); \
+ LOAD_SPRN(DBAT##n##U, addr+1); \
+ LOAD_SPRN(IBAT##n##L, addr+2); \
+ LOAD_SPRN(IBAT##n##U, addr+3);
+
+#define LOAD_SR(n, addr) \
+ lwz r10, ((addr)*4)(r4); \
+ mtsr n, r10;
+
+#define LOAD_4SR(n, addr) \
+ LOAD_SR(n, addr); \
+ LOAD_SR(n+1, addr+1); \
+ LOAD_SR(n+2, addr+2); \
+ LOAD_SR(n+3, addr+3);
+
+SYM_FUNC_START_LOCAL(restore_regs)
+ lis r4, registers@h
+ ori r4, r4, registers@l
+
+ /* MMU is not up yet */
+ subis r4, r4, CONFIG_KERNEL_START@h
+
+ lwz r0, 0(r4)
+ lwz r1, 0x4(r4)
+ lwz r2, 0x8(r4)
+ lmw r11, 0xc(r4)
+
+ /*
+ * these are a bit tricky
+ *
+ * 0x18 - HID0
+ * 0x19 - HID1
+ * 0x1a - HID2
+ * 0x1b - MSR
+ * 0x1c - LR
+ * 0x1d - reserved by 0xf0 (BDI2000)
+ */
+ LOAD_SPRN(RPA, 0x1e);
+ LOAD_SPRN(SDR1, 0x1f);
+
+ /* restore MMU regs */
+ LOAD_BAT(0, 0x20)
+ LOAD_BAT(1, 0x24)
+ LOAD_BAT(2, 0x28)
+ LOAD_BAT(3, 0x2c)
+ LOAD_BAT(4, 0x30)
+ LOAD_BAT(5, 0x34)
+ LOAD_BAT(6, 0x38)
+ LOAD_BAT(7, 0x3c)
+
+ LOAD_4SR(0, 0x40)
+ LOAD_4SR(4, 0x44)
+ LOAD_4SR(8, 0x48)
+ LOAD_4SR(12, 0x4c)
+
+ /* rest of regs */
+ LOAD_SPRN(SPRG0, 0x50);
+ LOAD_SPRN(SPRG1, 0x51);
+ LOAD_SPRN(SPRG2, 0x52);
+ LOAD_SPRN(SPRG3, 0x53);
+ LOAD_SPRN(SPRG4, 0x54);
+ LOAD_SPRN(SPRG5, 0x55);
+ LOAD_SPRN(SPRG6, 0x56);
+ LOAD_SPRN(SPRG7, 0x57);
+
+ LOAD_SPRN(IABR, 0x58);
+ LOAD_SPRN(DABR, 0x59);
+ LOAD_SPRN(TBWL, 0x5a); /* these two have separate R/W regs */
+ LOAD_SPRN(TBWU, 0x5b);
+
+ blr
+_ASM_NOKPROBE_SYMBOL(restore_regs)
+SYM_FUNC_END(restore_regs)
+
+
+
+/* cache flushing code. copied from arch/ppc/boot/util.S */
+#define NUM_CACHE_LINES (128*8)
+
+/*
+ * Flush data cache
+ * Do this by just reading lots of stuff into the cache.
+ */
+SYM_FUNC_START_LOCAL(flush_data_cache)
+ lis r3,CONFIG_KERNEL_START@h
+ ori r3,r3,CONFIG_KERNEL_START@l
+ li r4,NUM_CACHE_LINES
+ mtctr r4
+1:
+ lwz r4,0(r3)
+ addi r3,r3,L1_CACHE_BYTES /* Next line, please */
+ bdnz 1b
+ blr
+SYM_FUNC_END(flush_data_cache)
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
new file mode 100644
index 000000000..19626cd42
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support for 'media5200-platform' compatible boards.
+ *
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ *
+ * Description:
+ * This code implements support for the Freescape Media5200 platform
+ * (built around the MPC5200 SoC).
+ *
+ * Notable characteristic of the Media5200 is the presence of an FPGA
+ * that has all external IRQ lines routed through it. This file implements
+ * a cascaded interrupt controller driver which attaches itself to the
+ * Virtual IRQ subsystem after the primary mpc5200 interrupt controller
+ * is initialized.
+ */
+
+#undef DEBUG
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/mpc52xx.h>
+
+static const struct of_device_id mpc5200_gpio_ids[] __initconst = {
+ { .compatible = "fsl,mpc5200-gpio", },
+ { .compatible = "mpc5200-gpio", },
+ {}
+};
+
+/* FPGA register set */
+#define MEDIA5200_IRQ_ENABLE (0x40c)
+#define MEDIA5200_IRQ_STATUS (0x410)
+#define MEDIA5200_NUM_IRQS (6)
+#define MEDIA5200_IRQ_SHIFT (32 - MEDIA5200_NUM_IRQS)
+
+struct media5200_irq {
+ void __iomem *regs;
+ spinlock_t lock;
+ struct irq_domain *irqhost;
+};
+struct media5200_irq media5200_irq;
+
+static void media5200_irq_unmask(struct irq_data *d)
+{
+ unsigned long flags;
+ u32 val;
+
+ spin_lock_irqsave(&media5200_irq.lock, flags);
+ val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
+ val |= 1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d));
+ out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
+ spin_unlock_irqrestore(&media5200_irq.lock, flags);
+}
+
+static void media5200_irq_mask(struct irq_data *d)
+{
+ unsigned long flags;
+ u32 val;
+
+ spin_lock_irqsave(&media5200_irq.lock, flags);
+ val = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
+ val &= ~(1 << (MEDIA5200_IRQ_SHIFT + irqd_to_hwirq(d)));
+ out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, val);
+ spin_unlock_irqrestore(&media5200_irq.lock, flags);
+}
+
+static struct irq_chip media5200_irq_chip = {
+ .name = "Media5200 FPGA",
+ .irq_unmask = media5200_irq_unmask,
+ .irq_mask = media5200_irq_mask,
+ .irq_mask_ack = media5200_irq_mask,
+};
+
+static void media5200_irq_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ int val;
+ u32 status, enable;
+
+ /* Mask off the cascaded IRQ */
+ raw_spin_lock(&desc->lock);
+ chip->irq_mask(&desc->irq_data);
+ raw_spin_unlock(&desc->lock);
+
+ /* Ask the FPGA for IRQ status. If 'val' is 0, then no irqs
+ * are pending. 'ffs()' is 1 based */
+ status = in_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE);
+ enable = in_be32(media5200_irq.regs + MEDIA5200_IRQ_STATUS);
+ val = ffs((status & enable) >> MEDIA5200_IRQ_SHIFT);
+ if (val) {
+ generic_handle_domain_irq(media5200_irq.irqhost, val - 1);
+ /* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i\n",
+ * __func__, virq, status, enable, val - 1);
+ */
+ }
+
+ /* Processing done; can reenable the cascade now */
+ raw_spin_lock(&desc->lock);
+ chip->irq_ack(&desc->irq_data);
+ if (!irqd_irq_disabled(&desc->irq_data))
+ chip->irq_unmask(&desc->irq_data);
+ raw_spin_unlock(&desc->lock);
+}
+
+static int media5200_irq_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ pr_debug("%s: h=%p, virq=%i, hwirq=%i\n", __func__, h, virq, (int)hw);
+ irq_set_chip_data(virq, &media5200_irq);
+ irq_set_chip_and_handler(virq, &media5200_irq_chip, handle_level_irq);
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ return 0;
+}
+
+static int media5200_irq_xlate(struct irq_domain *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq,
+ unsigned int *out_flags)
+{
+ if (intsize != 2)
+ return -1;
+
+ pr_debug("%s: bank=%i, number=%i\n", __func__, intspec[0], intspec[1]);
+ *out_hwirq = intspec[1];
+ *out_flags = IRQ_TYPE_NONE;
+ return 0;
+}
+
+static const struct irq_domain_ops media5200_irq_ops = {
+ .map = media5200_irq_map,
+ .xlate = media5200_irq_xlate,
+};
+
+/*
+ * Setup Media5200 IRQ mapping
+ */
+static void __init media5200_init_irq(void)
+{
+ struct device_node *fpga_np;
+ int cascade_virq;
+
+ /* First setup the regular MPC5200 interrupt controller */
+ mpc52xx_init_irq();
+
+ /* Now find the FPGA IRQ */
+ fpga_np = of_find_compatible_node(NULL, NULL, "fsl,media5200-fpga");
+ if (!fpga_np)
+ goto out;
+ pr_debug("%s: found fpga node: %pOF\n", __func__, fpga_np);
+
+ media5200_irq.regs = of_iomap(fpga_np, 0);
+ if (!media5200_irq.regs)
+ goto out;
+ pr_debug("%s: mapped to %p\n", __func__, media5200_irq.regs);
+
+ cascade_virq = irq_of_parse_and_map(fpga_np, 0);
+ if (!cascade_virq)
+ goto out;
+ pr_debug("%s: cascaded on virq=%i\n", __func__, cascade_virq);
+
+ /* Disable all FPGA IRQs */
+ out_be32(media5200_irq.regs + MEDIA5200_IRQ_ENABLE, 0);
+
+ spin_lock_init(&media5200_irq.lock);
+
+ media5200_irq.irqhost = irq_domain_add_linear(fpga_np,
+ MEDIA5200_NUM_IRQS, &media5200_irq_ops, &media5200_irq);
+ if (!media5200_irq.irqhost)
+ goto out;
+ pr_debug("%s: allocated irqhost\n", __func__);
+
+ of_node_put(fpga_np);
+
+ irq_set_handler_data(cascade_virq, &media5200_irq);
+ irq_set_chained_handler(cascade_virq, media5200_irq_cascade);
+
+ return;
+
+ out:
+ pr_err("Could not find Media5200 FPGA; PCI interrupts will not work\n");
+ of_node_put(fpga_np);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init media5200_setup_arch(void)
+{
+
+ struct device_node *np;
+ struct mpc52xx_gpio __iomem *gpio;
+ u32 port_config;
+
+ if (ppc_md.progress)
+ ppc_md.progress("media5200_setup_arch()", 0);
+
+ /* Map important registers from the internal memory map */
+ mpc52xx_map_common_devices();
+
+ /* Some mpc5200 & mpc5200b related configuration */
+ mpc5200_setup_xlb_arbiter();
+
+ np = of_find_matching_node(NULL, mpc5200_gpio_ids);
+ gpio = of_iomap(np, 0);
+ of_node_put(np);
+ if (!gpio) {
+ printk(KERN_ERR "%s() failed. expect abnormal behavior\n",
+ __func__);
+ return;
+ }
+
+ /* Set port config */
+ port_config = in_be32(&gpio->port_config);
+
+ port_config &= ~0x03000000; /* ATA CS is on csb_4/5 */
+ port_config |= 0x01000000;
+
+ out_be32(&gpio->port_config, port_config);
+
+ /* Unmap zone */
+ iounmap(gpio);
+
+}
+
+define_machine(media5200_platform) {
+ .name = "media5200-platform",
+ .compatible = "fsl,media5200",
+ .setup_arch = media5200_setup_arch,
+ .discover_phbs = mpc52xx_setup_pci,
+ .init = mpc52xx_declare_of_platform_devices,
+ .init_IRQ = media5200_init_irq,
+ .get_irq = mpc52xx_get_irq,
+ .restart = mpc52xx_restart,
+};
diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c
new file mode 100644
index 000000000..f1e85e86f
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support for 'mpc5200-simple-platform' compatible boards.
+ *
+ * Written by Marian Balakowicz <m8@semihalf.com>
+ * Copyright (C) 2007 Semihalf
+ *
+ * Description:
+ * This code implements support for a simple MPC52xx based boards which
+ * do not need a custom platform specific setup. Such boards are
+ * supported assuming the following:
+ *
+ * - GPIO pins are configured by the firmware,
+ * - CDM configuration (clocking) is setup correctly by firmware,
+ * - if the 'fsl,has-wdt' property is present in one of the
+ * gpt nodes, then it is safe to use such gpt to reset the board,
+ * - PCI is supported if enabled in the kernel configuration
+ * and if there is a PCI bus node defined in the device tree.
+ *
+ * Boards that are compatible with this generic platform support
+ * are listed in a 'board' table.
+ */
+
+#undef DEBUG
+#include <linux/of.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/mpc52xx.h>
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc5200_simple_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("mpc5200_simple_setup_arch()", 0);
+
+ /* Map important registers from the internal memory map */
+ mpc52xx_map_common_devices();
+
+ /* Some mpc5200 & mpc5200b related configuration */
+ mpc5200_setup_xlb_arbiter();
+}
+
+/* list of the supported boards */
+static const char *board[] __initdata = {
+ "anonymous,a3m071",
+ "anonymous,a4m072",
+ "anon,charon",
+ "ifm,o2d",
+ "intercontrol,digsy-mtc",
+ "manroland,mucmc52",
+ "manroland,uc101",
+ "phytec,pcm030",
+ "phytec,pcm032",
+ "promess,motionpro",
+ "schindler,cm5200",
+ "tqc,tqm5200",
+ NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc5200_simple_probe(void)
+{
+ return of_device_compatible_match(of_root, board);
+}
+
+define_machine(mpc5200_simple_platform) {
+ .name = "mpc5200-simple-platform",
+ .probe = mpc5200_simple_probe,
+ .setup_arch = mpc5200_simple_setup_arch,
+ .discover_phbs = mpc52xx_setup_pci,
+ .init = mpc52xx_declare_of_platform_devices,
+ .init_IRQ = mpc52xx_init_irq,
+ .get_irq = mpc52xx_get_irq,
+ .restart = mpc52xx_restart,
+};
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
new file mode 100644
index 000000000..b4938e344
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
@@ -0,0 +1,306 @@
+/*
+ *
+ * Utility functions for the Freescale MPC52xx.
+ *
+ * Copyright (C) 2006 Sylvain Munaut <tnt@246tNt.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+#undef DEBUG
+
+#include <linux/gpio.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/of_gpio.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/mpc52xx.h>
+
+/* MPC5200 device tree match tables */
+static const struct of_device_id mpc52xx_xlb_ids[] __initconst = {
+ { .compatible = "fsl,mpc5200-xlb", },
+ { .compatible = "mpc5200-xlb", },
+ {}
+};
+static const struct of_device_id mpc52xx_bus_ids[] __initconst = {
+ { .compatible = "fsl,mpc5200-immr", },
+ { .compatible = "fsl,mpc5200b-immr", },
+ { .compatible = "simple-bus", },
+
+ /* depreciated matches; shouldn't be used in new device trees */
+ { .compatible = "fsl,lpb", },
+ { .type = "builtin", .compatible = "mpc5200", }, /* efika */
+ { .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
+ {}
+};
+
+/*
+ * This variable is mapped in mpc52xx_map_wdt() and used in mpc52xx_restart().
+ * Permanent mapping is required because mpc52xx_restart() can be called
+ * from interrupt context while node mapping (which calls ioremap())
+ * cannot be used at such point.
+ */
+static DEFINE_SPINLOCK(mpc52xx_lock);
+static struct mpc52xx_gpt __iomem *mpc52xx_wdt;
+static struct mpc52xx_cdm __iomem *mpc52xx_cdm;
+
+/*
+ * Configure the XLB arbiter settings to match what Linux expects.
+ */
+void __init
+mpc5200_setup_xlb_arbiter(void)
+{
+ struct device_node *np;
+ struct mpc52xx_xlb __iomem *xlb;
+
+ np = of_find_matching_node(NULL, mpc52xx_xlb_ids);
+ xlb = of_iomap(np, 0);
+ of_node_put(np);
+ if (!xlb) {
+ printk(KERN_ERR __FILE__ ": "
+ "Error mapping XLB in mpc52xx_setup_cpu(). "
+ "Expect some abnormal behavior\n");
+ return;
+ }
+
+ /* Configure the XLB Arbiter priorities */
+ out_be32(&xlb->master_pri_enable, 0xff);
+ out_be32(&xlb->master_priority, 0x11111111);
+
+ /*
+ * Disable XLB pipelining
+ * (cfr errate 292. We could do this only just before ATA PIO
+ * transaction and re-enable it afterwards ...)
+ * Not needed on MPC5200B.
+ */
+ if ((mfspr(SPRN_SVR) & MPC5200_SVR_MASK) == MPC5200_SVR)
+ out_be32(&xlb->config, in_be32(&xlb->config) | MPC52xx_XLB_CFG_PLDIS);
+
+ iounmap(xlb);
+}
+
+/*
+ * This variable is mapped in mpc52xx_map_common_devices and
+ * used in mpc5200_psc_ac97_gpio_reset().
+ */
+static DEFINE_SPINLOCK(gpio_lock);
+struct mpc52xx_gpio __iomem *simple_gpio;
+struct mpc52xx_gpio_wkup __iomem *wkup_gpio;
+
+/**
+ * mpc52xx_declare_of_platform_devices: register internal devices and children
+ * of the localplus bus to the of_platform
+ * bus.
+ */
+void __init mpc52xx_declare_of_platform_devices(void)
+{
+ /* Find all the 'platform' devices and register them. */
+ if (of_platform_populate(NULL, mpc52xx_bus_ids, NULL, NULL))
+ pr_err(__FILE__ ": Error while populating devices from DT\n");
+}
+
+/*
+ * match tables used by mpc52xx_map_common_devices()
+ */
+static const struct of_device_id mpc52xx_gpt_ids[] __initconst = {
+ { .compatible = "fsl,mpc5200-gpt", },
+ { .compatible = "mpc5200-gpt", }, /* old */
+ {}
+};
+static const struct of_device_id mpc52xx_cdm_ids[] __initconst = {
+ { .compatible = "fsl,mpc5200-cdm", },
+ { .compatible = "mpc5200-cdm", }, /* old */
+ {}
+};
+static const struct of_device_id mpc52xx_gpio_simple[] __initconst = {
+ { .compatible = "fsl,mpc5200-gpio", },
+ {}
+};
+static const struct of_device_id mpc52xx_gpio_wkup[] __initconst = {
+ { .compatible = "fsl,mpc5200-gpio-wkup", },
+ {}
+};
+
+
+/**
+ * mpc52xx_map_common_devices: iomap devices required by common code
+ */
+void __init
+mpc52xx_map_common_devices(void)
+{
+ struct device_node *np;
+
+ /* mpc52xx_wdt is mapped here and used in mpc52xx_restart,
+ * possibly from a interrupt context. wdt is only implement
+ * on a gpt0, so check has-wdt property before mapping.
+ */
+ for_each_matching_node(np, mpc52xx_gpt_ids) {
+ if (of_property_read_bool(np, "fsl,has-wdt") ||
+ of_property_read_bool(np, "has-wdt")) {
+ mpc52xx_wdt = of_iomap(np, 0);
+ of_node_put(np);
+ break;
+ }
+ }
+
+ /* Clock Distribution Module, used by PSC clock setting function */
+ np = of_find_matching_node(NULL, mpc52xx_cdm_ids);
+ mpc52xx_cdm = of_iomap(np, 0);
+ of_node_put(np);
+
+ /* simple_gpio registers */
+ np = of_find_matching_node(NULL, mpc52xx_gpio_simple);
+ simple_gpio = of_iomap(np, 0);
+ of_node_put(np);
+
+ /* wkup_gpio registers */
+ np = of_find_matching_node(NULL, mpc52xx_gpio_wkup);
+ wkup_gpio = of_iomap(np, 0);
+ of_node_put(np);
+}
+
+/**
+ * mpc52xx_set_psc_clkdiv: Set clock divider in the CDM for PSC ports
+ *
+ * @psc_id: id of psc port; must be 1,2,3 or 6
+ * @clkdiv: clock divider value to put into CDM PSC register.
+ */
+int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv)
+{
+ unsigned long flags;
+ u16 __iomem *reg;
+ u32 val;
+ u32 mask;
+ u32 mclken_div;
+
+ if (!mpc52xx_cdm)
+ return -ENODEV;
+
+ mclken_div = 0x8000 | (clkdiv & 0x1FF);
+ switch (psc_id) {
+ case 1: reg = &mpc52xx_cdm->mclken_div_psc1; mask = 0x20; break;
+ case 2: reg = &mpc52xx_cdm->mclken_div_psc2; mask = 0x40; break;
+ case 3: reg = &mpc52xx_cdm->mclken_div_psc3; mask = 0x80; break;
+ case 6: reg = &mpc52xx_cdm->mclken_div_psc6; mask = 0x10; break;
+ default:
+ return -ENODEV;
+ }
+
+ /* Set the rate and enable the clock */
+ spin_lock_irqsave(&mpc52xx_lock, flags);
+ out_be16(reg, mclken_div);
+ val = in_be32(&mpc52xx_cdm->clk_enables);
+ out_be32(&mpc52xx_cdm->clk_enables, val | mask);
+ spin_unlock_irqrestore(&mpc52xx_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(mpc52xx_set_psc_clkdiv);
+
+/**
+ * mpc52xx_restart: ppc_md->restart hook for mpc5200 using the watchdog timer
+ */
+void __noreturn mpc52xx_restart(char *cmd)
+{
+ local_irq_disable();
+
+ /* Turn on the watchdog and wait for it to expire.
+ * It effectively does a reset. */
+ if (mpc52xx_wdt) {
+ out_be32(&mpc52xx_wdt->mode, 0x00000000);
+ out_be32(&mpc52xx_wdt->count, 0x000000ff);
+ out_be32(&mpc52xx_wdt->mode, 0x00009004);
+ } else
+ printk(KERN_ERR __FILE__ ": "
+ "mpc52xx_restart: Can't access wdt. "
+ "Restart impossible, system halted.\n");
+
+ while (1);
+}
+
+#define PSC1_RESET 0x1
+#define PSC1_SYNC 0x4
+#define PSC1_SDATA_OUT 0x1
+#define PSC2_RESET 0x2
+#define PSC2_SYNC (0x4<<4)
+#define PSC2_SDATA_OUT (0x1<<4)
+#define MPC52xx_GPIO_PSC1_MASK 0x7
+#define MPC52xx_GPIO_PSC2_MASK (0x7<<4)
+
+/**
+ * mpc5200_psc_ac97_gpio_reset: Use gpio pins to reset the ac97 bus
+ *
+ * @psc: psc number to reset (only psc 1 and 2 support ac97)
+ */
+int mpc5200_psc_ac97_gpio_reset(int psc_number)
+{
+ unsigned long flags;
+ u32 gpio;
+ u32 mux;
+ int out;
+ int reset;
+ int sync;
+
+ if ((!simple_gpio) || (!wkup_gpio))
+ return -ENODEV;
+
+ switch (psc_number) {
+ case 0:
+ reset = PSC1_RESET; /* AC97_1_RES */
+ sync = PSC1_SYNC; /* AC97_1_SYNC */
+ out = PSC1_SDATA_OUT; /* AC97_1_SDATA_OUT */
+ gpio = MPC52xx_GPIO_PSC1_MASK;
+ break;
+ case 1:
+ reset = PSC2_RESET; /* AC97_2_RES */
+ sync = PSC2_SYNC; /* AC97_2_SYNC */
+ out = PSC2_SDATA_OUT; /* AC97_2_SDATA_OUT */
+ gpio = MPC52xx_GPIO_PSC2_MASK;
+ break;
+ default:
+ pr_err(__FILE__ ": Unable to determine PSC, no ac97 "
+ "cold-reset will be performed\n");
+ return -ENODEV;
+ }
+
+ spin_lock_irqsave(&gpio_lock, flags);
+
+ /* Reconfigure pin-muxing to gpio */
+ mux = in_be32(&simple_gpio->port_config);
+ out_be32(&simple_gpio->port_config, mux & (~gpio));
+
+ /* enable gpio pins for output */
+ setbits8(&wkup_gpio->wkup_gpioe, reset);
+ setbits32(&simple_gpio->simple_gpioe, sync | out);
+
+ setbits8(&wkup_gpio->wkup_ddr, reset);
+ setbits32(&simple_gpio->simple_ddr, sync | out);
+
+ /* Assert cold reset */
+ clrbits32(&simple_gpio->simple_dvo, sync | out);
+ clrbits8(&wkup_gpio->wkup_dvo, reset);
+
+ /* wait for 1 us */
+ udelay(1);
+
+ /* Deassert reset */
+ setbits8(&wkup_gpio->wkup_dvo, reset);
+
+ /* wait at least 200ns */
+ /* 7 ~= (200ns * timebase) / ns2sec */
+ __delay(7);
+
+ /* Restore pin-muxing */
+ out_be32(&simple_gpio->port_config, mux);
+
+ spin_unlock_irqrestore(&gpio_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(mpc5200_psc_ac97_gpio_reset);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
new file mode 100644
index 000000000..581059527
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -0,0 +1,780 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC5200 General Purpose Timer device driver
+ *
+ * Copyright (c) 2009 Secret Lab Technologies Ltd.
+ * Copyright (c) 2008 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
+ *
+ * This file is a driver for the General Purpose Timer (gpt) devices
+ * found on the MPC5200 SoC. Each timer has an IO pin which can be used
+ * for GPIO or can be used to raise interrupts. The timer function can
+ * be used independently from the IO pin, or it can be used to control
+ * output signals or measure input signals.
+ *
+ * This driver supports the GPIO and IRQ controller functions of the GPT
+ * device. Timer functions are not yet supported.
+ *
+ * The timer gpt0 can be used as watchdog (wdt). If the wdt mode is used,
+ * this prevents the use of any gpt0 gpt function (i.e. they will fail with
+ * -EBUSY). Thus, the safety wdt function always has precedence over the gpt
+ * function. If the kernel has been compiled with CONFIG_WATCHDOG_NOWAYOUT,
+ * this means that gpt0 is locked in wdt mode until the next reboot - this
+ * may be a requirement in safety applications.
+ *
+ * To use the GPIO function, the following two properties must be added
+ * to the device tree node for the gpt device (typically in the .dts file
+ * for the board):
+ * gpio-controller;
+ * #gpio-cells = < 2 >;
+ * This driver will register the GPIO pin if it finds the gpio-controller
+ * property in the device tree.
+ *
+ * To use the IRQ controller function, the following two properties must
+ * be added to the device tree node for the gpt device:
+ * interrupt-controller;
+ * #interrupt-cells = < 1 >;
+ * The IRQ controller binding only uses one cell to specify the interrupt,
+ * and the IRQ flags are encoded in the cell. A cell is not used to encode
+ * the IRQ number because the GPT only has a single IRQ source. For flags,
+ * a value of '1' means rising edge sensitive and '2' means falling edge.
+ *
+ * The GPIO and the IRQ controller functions can be used at the same time,
+ * but in this use case the IO line will only work as an input. Trying to
+ * use it as a GPIO output will not work.
+ *
+ * When using the GPIO line as an output, it can either be driven as normal
+ * IO, or it can be an Open Collector (OC) output. At the moment it is the
+ * responsibility of either the bootloader or the platform setup code to set
+ * the output mode. This driver does not change the output mode setting.
+ */
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_gpio.h>
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/watchdog.h>
+#include <linux/miscdevice.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <asm/div64.h>
+#include <asm/mpc52xx.h>
+
+MODULE_DESCRIPTION("Freescale MPC52xx gpt driver");
+MODULE_AUTHOR("Sascha Hauer, Grant Likely, Albrecht Dreß");
+MODULE_LICENSE("GPL");
+
+/**
+ * struct mpc52xx_gpt - Private data structure for MPC52xx GPT driver
+ * @dev: pointer to device structure
+ * @regs: virtual address of GPT registers
+ * @lock: spinlock to coordinate between different functions.
+ * @gc: gpio_chip instance structure; used when GPIO is enabled
+ * @irqhost: Pointer to irq_domain instance; used when IRQ mode is supported
+ * @wdt_mode: only relevant for gpt0: bit 0 (MPC52xx_GPT_CAN_WDT) indicates
+ * if the gpt may be used as wdt, bit 1 (MPC52xx_GPT_IS_WDT) indicates
+ * if the timer is actively used as wdt which blocks gpt functions
+ */
+struct mpc52xx_gpt_priv {
+ struct list_head list; /* List of all GPT devices */
+ struct device *dev;
+ struct mpc52xx_gpt __iomem *regs;
+ raw_spinlock_t lock;
+ struct irq_domain *irqhost;
+ u32 ipb_freq;
+ u8 wdt_mode;
+
+#if defined(CONFIG_GPIOLIB)
+ struct gpio_chip gc;
+#endif
+};
+
+LIST_HEAD(mpc52xx_gpt_list);
+DEFINE_MUTEX(mpc52xx_gpt_list_mutex);
+
+#define MPC52xx_GPT_MODE_MS_MASK (0x07)
+#define MPC52xx_GPT_MODE_MS_IC (0x01)
+#define MPC52xx_GPT_MODE_MS_OC (0x02)
+#define MPC52xx_GPT_MODE_MS_PWM (0x03)
+#define MPC52xx_GPT_MODE_MS_GPIO (0x04)
+
+#define MPC52xx_GPT_MODE_GPIO_MASK (0x30)
+#define MPC52xx_GPT_MODE_GPIO_OUT_LOW (0x20)
+#define MPC52xx_GPT_MODE_GPIO_OUT_HIGH (0x30)
+
+#define MPC52xx_GPT_MODE_COUNTER_ENABLE (0x1000)
+#define MPC52xx_GPT_MODE_CONTINUOUS (0x0400)
+#define MPC52xx_GPT_MODE_OPEN_DRAIN (0x0200)
+#define MPC52xx_GPT_MODE_IRQ_EN (0x0100)
+#define MPC52xx_GPT_MODE_WDT_EN (0x8000)
+
+#define MPC52xx_GPT_MODE_ICT_MASK (0x030000)
+#define MPC52xx_GPT_MODE_ICT_RISING (0x010000)
+#define MPC52xx_GPT_MODE_ICT_FALLING (0x020000)
+#define MPC52xx_GPT_MODE_ICT_TOGGLE (0x030000)
+
+#define MPC52xx_GPT_MODE_WDT_PING (0xa5)
+
+#define MPC52xx_GPT_STATUS_IRQMASK (0x000f)
+
+#define MPC52xx_GPT_CAN_WDT (1 << 0)
+#define MPC52xx_GPT_IS_WDT (1 << 1)
+
+
+/* ---------------------------------------------------------------------
+ * Cascaded interrupt controller hooks
+ */
+
+static void mpc52xx_gpt_irq_unmask(struct irq_data *d)
+{
+ struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&gpt->lock, flags);
+ setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+}
+
+static void mpc52xx_gpt_irq_mask(struct irq_data *d)
+{
+ struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&gpt->lock, flags);
+ clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+}
+
+static void mpc52xx_gpt_irq_ack(struct irq_data *d)
+{
+ struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+
+ out_be32(&gpt->regs->status, MPC52xx_GPT_STATUS_IRQMASK);
+}
+
+static int mpc52xx_gpt_irq_set_type(struct irq_data *d, unsigned int flow_type)
+{
+ struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
+ unsigned long flags;
+ u32 reg;
+
+ dev_dbg(gpt->dev, "%s: virq=%i type=%x\n", __func__, d->irq, flow_type);
+
+ raw_spin_lock_irqsave(&gpt->lock, flags);
+ reg = in_be32(&gpt->regs->mode) & ~MPC52xx_GPT_MODE_ICT_MASK;
+ if (flow_type & IRQF_TRIGGER_RISING)
+ reg |= MPC52xx_GPT_MODE_ICT_RISING;
+ if (flow_type & IRQF_TRIGGER_FALLING)
+ reg |= MPC52xx_GPT_MODE_ICT_FALLING;
+ out_be32(&gpt->regs->mode, reg);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+ return 0;
+}
+
+static struct irq_chip mpc52xx_gpt_irq_chip = {
+ .name = "MPC52xx GPT",
+ .irq_unmask = mpc52xx_gpt_irq_unmask,
+ .irq_mask = mpc52xx_gpt_irq_mask,
+ .irq_ack = mpc52xx_gpt_irq_ack,
+ .irq_set_type = mpc52xx_gpt_irq_set_type,
+};
+
+static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc)
+{
+ struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc);
+ u32 status;
+
+ status = in_be32(&gpt->regs->status) & MPC52xx_GPT_STATUS_IRQMASK;
+ if (status)
+ generic_handle_domain_irq(gpt->irqhost, 0);
+}
+
+static int mpc52xx_gpt_irq_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ struct mpc52xx_gpt_priv *gpt = h->host_data;
+
+ dev_dbg(gpt->dev, "%s: h=%p, virq=%i\n", __func__, h, virq);
+ irq_set_chip_data(virq, gpt);
+ irq_set_chip_and_handler(virq, &mpc52xx_gpt_irq_chip, handle_edge_irq);
+
+ return 0;
+}
+
+static int mpc52xx_gpt_irq_xlate(struct irq_domain *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq,
+ unsigned int *out_flags)
+{
+ struct mpc52xx_gpt_priv *gpt = h->host_data;
+
+ dev_dbg(gpt->dev, "%s: flags=%i\n", __func__, intspec[0]);
+
+ if ((intsize < 1) || (intspec[0] > 3)) {
+ dev_err(gpt->dev, "bad irq specifier in %pOF\n", ct);
+ return -EINVAL;
+ }
+
+ *out_hwirq = 0; /* The GPT only has 1 IRQ line */
+ *out_flags = intspec[0];
+
+ return 0;
+}
+
+static const struct irq_domain_ops mpc52xx_gpt_irq_ops = {
+ .map = mpc52xx_gpt_irq_map,
+ .xlate = mpc52xx_gpt_irq_xlate,
+};
+
+static void
+mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
+{
+ int cascade_virq;
+ unsigned long flags;
+ u32 mode;
+
+ cascade_virq = irq_of_parse_and_map(node, 0);
+ if (!cascade_virq)
+ return;
+
+ gpt->irqhost = irq_domain_add_linear(node, 1, &mpc52xx_gpt_irq_ops, gpt);
+ if (!gpt->irqhost) {
+ dev_err(gpt->dev, "irq_domain_add_linear() failed\n");
+ return;
+ }
+
+ irq_set_handler_data(cascade_virq, gpt);
+ irq_set_chained_handler(cascade_virq, mpc52xx_gpt_irq_cascade);
+
+ /* If the GPT is currently disabled, then change it to be in Input
+ * Capture mode. If the mode is non-zero, then the pin could be
+ * already in use for something. */
+ raw_spin_lock_irqsave(&gpt->lock, flags);
+ mode = in_be32(&gpt->regs->mode);
+ if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0)
+ out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+ dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq);
+}
+
+
+/* ---------------------------------------------------------------------
+ * GPIOLIB hooks
+ */
+#if defined(CONFIG_GPIOLIB)
+static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio)
+{
+ struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
+
+ return (in_be32(&gpt->regs->status) >> 8) & 1;
+}
+
+static void
+mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
+{
+ struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
+ unsigned long flags;
+ u32 r;
+
+ dev_dbg(gpt->dev, "%s: gpio:%d v:%d\n", __func__, gpio, v);
+ r = v ? MPC52xx_GPT_MODE_GPIO_OUT_HIGH : MPC52xx_GPT_MODE_GPIO_OUT_LOW;
+
+ raw_spin_lock_irqsave(&gpt->lock, flags);
+ clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+}
+
+static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+ struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
+ unsigned long flags;
+
+ dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio);
+
+ raw_spin_lock_irqsave(&gpt->lock, flags);
+ clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+ return 0;
+}
+
+static int
+mpc52xx_gpt_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+ mpc52xx_gpt_gpio_set(gc, gpio, val);
+ return 0;
+}
+
+static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt)
+{
+ int rc;
+
+ /* Only setup GPIO if the device claims the GPT is a GPIO controller */
+ if (!device_property_present(gpt->dev, "gpio-controller"))
+ return;
+
+ gpt->gc.label = kasprintf(GFP_KERNEL, "%pfw", dev_fwnode(gpt->dev));
+ if (!gpt->gc.label) {
+ dev_err(gpt->dev, "out of memory\n");
+ return;
+ }
+
+ gpt->gc.ngpio = 1;
+ gpt->gc.direction_input = mpc52xx_gpt_gpio_dir_in;
+ gpt->gc.direction_output = mpc52xx_gpt_gpio_dir_out;
+ gpt->gc.get = mpc52xx_gpt_gpio_get;
+ gpt->gc.set = mpc52xx_gpt_gpio_set;
+ gpt->gc.base = -1;
+ gpt->gc.parent = gpt->dev;
+
+ /* Setup external pin in GPIO mode */
+ clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK,
+ MPC52xx_GPT_MODE_MS_GPIO);
+
+ rc = gpiochip_add_data(&gpt->gc, gpt);
+ if (rc)
+ dev_err(gpt->dev, "gpiochip_add_data() failed; rc=%i\n", rc);
+
+ dev_dbg(gpt->dev, "%s() complete.\n", __func__);
+}
+#else /* defined(CONFIG_GPIOLIB) */
+static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) { }
+#endif /* defined(CONFIG_GPIOLIB) */
+
+/***********************************************************************
+ * Timer API
+ */
+
+/**
+ * mpc52xx_gpt_from_irq - Return the GPT device associated with an IRQ number
+ * @irq: irq of timer.
+ */
+struct mpc52xx_gpt_priv *mpc52xx_gpt_from_irq(int irq)
+{
+ struct mpc52xx_gpt_priv *gpt;
+ struct list_head *pos;
+
+ /* Iterate over the list of timers looking for a matching device */
+ mutex_lock(&mpc52xx_gpt_list_mutex);
+ list_for_each(pos, &mpc52xx_gpt_list) {
+ gpt = container_of(pos, struct mpc52xx_gpt_priv, list);
+ if (gpt->irqhost && irq == irq_linear_revmap(gpt->irqhost, 0)) {
+ mutex_unlock(&mpc52xx_gpt_list_mutex);
+ return gpt;
+ }
+ }
+ mutex_unlock(&mpc52xx_gpt_list_mutex);
+
+ return NULL;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_from_irq);
+
+static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period,
+ int continuous, int as_wdt)
+{
+ u32 clear, set;
+ u64 clocks;
+ u32 prescale;
+ unsigned long flags;
+
+ clear = MPC52xx_GPT_MODE_MS_MASK | MPC52xx_GPT_MODE_CONTINUOUS;
+ set = MPC52xx_GPT_MODE_MS_GPIO | MPC52xx_GPT_MODE_COUNTER_ENABLE;
+ if (as_wdt) {
+ clear |= MPC52xx_GPT_MODE_IRQ_EN;
+ set |= MPC52xx_GPT_MODE_WDT_EN;
+ } else if (continuous)
+ set |= MPC52xx_GPT_MODE_CONTINUOUS;
+
+ /* Determine the number of clocks in the requested period. 64 bit
+ * arithmetic is done here to preserve the precision until the value
+ * is scaled back down into the u32 range. Period is in 'ns', bus
+ * frequency is in Hz. */
+ clocks = period * (u64)gpt->ipb_freq;
+ do_div(clocks, 1000000000); /* Scale it down to ns range */
+
+ /* This device cannot handle a clock count greater than 32 bits */
+ if (clocks > 0xffffffff)
+ return -EINVAL;
+
+ /* Calculate the prescaler and count values from the clocks value.
+ * 'clocks' is the number of clock ticks in the period. The timer
+ * has 16 bit precision and a 16 bit prescaler. Prescaler is
+ * calculated by integer dividing the clocks by 0x10000 (shifting
+ * down 16 bits) to obtain the smallest possible divisor for clocks
+ * to get a 16 bit count value.
+ *
+ * Note: the prescale register is '1' based, not '0' based. ie. a
+ * value of '1' means divide the clock by one. 0xffff divides the
+ * clock by 0xffff. '0x0000' does not divide by zero, but wraps
+ * around and divides by 0x10000. That is why prescale must be
+ * a u32 variable, not a u16, for this calculation. */
+ prescale = (clocks >> 16) + 1;
+ do_div(clocks, prescale);
+ if (clocks > 0xffff) {
+ pr_err("calculation error; prescale:%x clocks:%llx\n",
+ prescale, clocks);
+ return -EINVAL;
+ }
+
+ /* Set and enable the timer, reject an attempt to use a wdt as gpt */
+ raw_spin_lock_irqsave(&gpt->lock, flags);
+ if (as_wdt)
+ gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
+ else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+ return -EBUSY;
+ }
+ out_be32(&gpt->regs->count, prescale << 16 | clocks);
+ clrsetbits_be32(&gpt->regs->mode, clear, set);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+ return 0;
+}
+
+/**
+ * mpc52xx_gpt_start_timer - Set and enable the GPT timer
+ * @gpt: Pointer to gpt private data structure
+ * @period: period of timer in ns; max. ~130s @ 33MHz IPB clock
+ * @continuous: set to 1 to make timer continuous free running
+ *
+ * An interrupt will be generated every time the timer fires
+ */
+int mpc52xx_gpt_start_timer(struct mpc52xx_gpt_priv *gpt, u64 period,
+ int continuous)
+{
+ return mpc52xx_gpt_do_start(gpt, period, continuous, 0);
+}
+EXPORT_SYMBOL(mpc52xx_gpt_start_timer);
+
+/**
+ * mpc52xx_gpt_stop_timer - Stop a gpt
+ * @gpt: Pointer to gpt private data structure
+ *
+ * Returns an error if attempting to stop a wdt
+ */
+int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt)
+{
+ unsigned long flags;
+
+ /* reject the operation if the timer is used as watchdog (gpt 0 only) */
+ raw_spin_lock_irqsave(&gpt->lock, flags);
+ if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+ return -EBUSY;
+ }
+
+ clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_stop_timer);
+
+/**
+ * mpc52xx_gpt_timer_period - Read the timer period
+ * @gpt: Pointer to gpt private data structure
+ *
+ * Returns the timer period in ns
+ */
+u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt)
+{
+ u64 period;
+ u64 prescale;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&gpt->lock, flags);
+ period = in_be32(&gpt->regs->count);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+ prescale = period >> 16;
+ period &= 0xffff;
+ if (prescale == 0)
+ prescale = 0x10000;
+ period = period * prescale * 1000000000ULL;
+ do_div(period, gpt->ipb_freq);
+ return period;
+}
+EXPORT_SYMBOL(mpc52xx_gpt_timer_period);
+
+#if defined(CONFIG_MPC5200_WDT)
+/***********************************************************************
+ * Watchdog API for gpt0
+ */
+
+#define WDT_IDENTITY "mpc52xx watchdog on GPT0"
+
+/* wdt_is_active stores whether or not the /dev/watchdog device is opened */
+static unsigned long wdt_is_active;
+
+/* wdt-capable gpt */
+static struct mpc52xx_gpt_priv *mpc52xx_gpt_wdt;
+
+/* low-level wdt functions */
+static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
+ out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING);
+ raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+}
+
+/* wdt misc device api */
+static ssize_t mpc52xx_wdt_write(struct file *file, const char __user *data,
+ size_t len, loff_t *ppos)
+{
+ struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+ mpc52xx_gpt_wdt_ping(gpt_wdt);
+ return 0;
+}
+
+static const struct watchdog_info mpc5200_wdt_info = {
+ .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING,
+ .identity = WDT_IDENTITY,
+};
+
+static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+ int __user *data = (int __user *)arg;
+ int timeout;
+ u64 real_timeout;
+ int ret = 0;
+
+ switch (cmd) {
+ case WDIOC_GETSUPPORT:
+ ret = copy_to_user(data, &mpc5200_wdt_info,
+ sizeof(mpc5200_wdt_info));
+ if (ret)
+ ret = -EFAULT;
+ break;
+
+ case WDIOC_GETSTATUS:
+ case WDIOC_GETBOOTSTATUS:
+ ret = put_user(0, data);
+ break;
+
+ case WDIOC_KEEPALIVE:
+ mpc52xx_gpt_wdt_ping(gpt_wdt);
+ break;
+
+ case WDIOC_SETTIMEOUT:
+ ret = get_user(timeout, data);
+ if (ret)
+ break;
+ real_timeout = (u64) timeout * 1000000000ULL;
+ ret = mpc52xx_gpt_do_start(gpt_wdt, real_timeout, 0, 1);
+ if (ret)
+ break;
+ /* fall through and return the timeout */
+ fallthrough;
+
+ case WDIOC_GETTIMEOUT:
+ /* we need to round here as to avoid e.g. the following
+ * situation:
+ * - timeout requested is 1 second;
+ * - real timeout @33MHz is 999997090ns
+ * - the int divide by 10^9 will return 0.
+ */
+ real_timeout =
+ mpc52xx_gpt_timer_period(gpt_wdt) + 500000000ULL;
+ do_div(real_timeout, 1000000000ULL);
+ timeout = (int) real_timeout;
+ ret = put_user(timeout, data);
+ break;
+
+ default:
+ ret = -ENOTTY;
+ }
+ return ret;
+}
+
+static int mpc52xx_wdt_open(struct inode *inode, struct file *file)
+{
+ int ret;
+
+ /* sanity check */
+ if (!mpc52xx_gpt_wdt)
+ return -ENODEV;
+
+ /* /dev/watchdog can only be opened once */
+ if (test_and_set_bit(0, &wdt_is_active))
+ return -EBUSY;
+
+ /* Set and activate the watchdog with 30 seconds timeout */
+ ret = mpc52xx_gpt_do_start(mpc52xx_gpt_wdt, 30ULL * 1000000000ULL,
+ 0, 1);
+ if (ret) {
+ clear_bit(0, &wdt_is_active);
+ return ret;
+ }
+
+ file->private_data = mpc52xx_gpt_wdt;
+ return stream_open(inode, file);
+}
+
+static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
+{
+ /* note: releasing the wdt in NOWAYOUT-mode does not stop it */
+#if !defined(CONFIG_WATCHDOG_NOWAYOUT)
+ struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
+ clrbits32(&gpt_wdt->regs->mode,
+ MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN);
+ gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT;
+ raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+#endif
+ clear_bit(0, &wdt_is_active);
+ return 0;
+}
+
+
+static const struct file_operations mpc52xx_wdt_fops = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .write = mpc52xx_wdt_write,
+ .unlocked_ioctl = mpc52xx_wdt_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+ .open = mpc52xx_wdt_open,
+ .release = mpc52xx_wdt_release,
+};
+
+static struct miscdevice mpc52xx_wdt_miscdev = {
+ .minor = WATCHDOG_MINOR,
+ .name = "watchdog",
+ .fops = &mpc52xx_wdt_fops,
+};
+
+static int mpc52xx_gpt_wdt_init(void)
+{
+ int err;
+
+ /* try to register the watchdog misc device */
+ err = misc_register(&mpc52xx_wdt_miscdev);
+ if (err)
+ pr_err("%s: cannot register watchdog device\n", WDT_IDENTITY);
+ else
+ pr_info("%s: watchdog device registered\n", WDT_IDENTITY);
+ return err;
+}
+
+static int mpc52xx_gpt_wdt_setup(struct mpc52xx_gpt_priv *gpt,
+ const u32 *period)
+{
+ u64 real_timeout;
+
+ /* remember the gpt for the wdt operation */
+ mpc52xx_gpt_wdt = gpt;
+
+ /* configure the wdt if the device tree contained a timeout */
+ if (!period || *period == 0)
+ return 0;
+
+ real_timeout = (u64) *period * 1000000000ULL;
+ if (mpc52xx_gpt_do_start(gpt, real_timeout, 0, 1))
+ dev_warn(gpt->dev, "starting as wdt failed\n");
+ else
+ dev_info(gpt->dev, "watchdog set to %us timeout\n", *period);
+ return 0;
+}
+
+#else
+
+static int mpc52xx_gpt_wdt_init(void)
+{
+ return 0;
+}
+
+static inline int mpc52xx_gpt_wdt_setup(struct mpc52xx_gpt_priv *gpt,
+ const u32 *period)
+{
+ return 0;
+}
+
+#endif /* CONFIG_MPC5200_WDT */
+
+/* ---------------------------------------------------------------------
+ * of_platform bus binding code
+ */
+static int mpc52xx_gpt_probe(struct platform_device *ofdev)
+{
+ struct mpc52xx_gpt_priv *gpt;
+
+ gpt = devm_kzalloc(&ofdev->dev, sizeof *gpt, GFP_KERNEL);
+ if (!gpt)
+ return -ENOMEM;
+
+ raw_spin_lock_init(&gpt->lock);
+ gpt->dev = &ofdev->dev;
+ gpt->ipb_freq = mpc5xxx_get_bus_frequency(&ofdev->dev);
+ gpt->regs = of_iomap(ofdev->dev.of_node, 0);
+ if (!gpt->regs)
+ return -ENOMEM;
+
+ dev_set_drvdata(&ofdev->dev, gpt);
+
+ mpc52xx_gpt_gpio_setup(gpt);
+ mpc52xx_gpt_irq_setup(gpt, ofdev->dev.of_node);
+
+ mutex_lock(&mpc52xx_gpt_list_mutex);
+ list_add(&gpt->list, &mpc52xx_gpt_list);
+ mutex_unlock(&mpc52xx_gpt_list_mutex);
+
+ /* check if this device could be a watchdog */
+ if (of_property_read_bool(ofdev->dev.of_node, "fsl,has-wdt") ||
+ of_property_read_bool(ofdev->dev.of_node, "has-wdt")) {
+ const u32 *on_boot_wdt;
+
+ gpt->wdt_mode = MPC52xx_GPT_CAN_WDT;
+ on_boot_wdt = of_get_property(ofdev->dev.of_node,
+ "fsl,wdt-on-boot", NULL);
+ if (on_boot_wdt) {
+ dev_info(gpt->dev, "used as watchdog\n");
+ gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
+ } else
+ dev_info(gpt->dev, "can function as watchdog\n");
+ mpc52xx_gpt_wdt_setup(gpt, on_boot_wdt);
+ }
+
+ return 0;
+}
+
+static const struct of_device_id mpc52xx_gpt_match[] = {
+ { .compatible = "fsl,mpc5200-gpt", },
+
+ /* Depreciated compatible values; don't use for new dts files */
+ { .compatible = "fsl,mpc5200-gpt-gpio", },
+ { .compatible = "mpc5200-gpt", },
+ {}
+};
+
+static struct platform_driver mpc52xx_gpt_driver = {
+ .driver = {
+ .name = "mpc52xx-gpt",
+ .suppress_bind_attrs = true,
+ .of_match_table = mpc52xx_gpt_match,
+ },
+ .probe = mpc52xx_gpt_probe,
+};
+
+static int __init mpc52xx_gpt_init(void)
+{
+ return platform_driver_register(&mpc52xx_gpt_driver);
+}
+
+/* Make sure GPIOs and IRQs get set up before anyone tries to use them */
+subsys_initcall(mpc52xx_gpt_init);
+device_initcall(mpc52xx_gpt_wdt_init);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
new file mode 100644
index 000000000..0ca4401ba
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -0,0 +1,419 @@
+/*
+ * PCI code for the Freescale MPC52xx embedded CPU.
+ *
+ * Copyright (C) 2006 Secret Lab Technologies Ltd.
+ * Grant Likely <grant.likely@secretlab.ca>
+ * Copyright (C) 2004 Sylvain Munaut <tnt@246tNt.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#undef DEBUG
+
+#include <linux/pci.h>
+#include <linux/of_address.h>
+#include <asm/mpc52xx.h>
+#include <asm/delay.h>
+#include <asm/machdep.h>
+#include <linux/kernel.h>
+
+
+/* ======================================================================== */
+/* Structures mapping & Defines for PCI Unit */
+/* ======================================================================== */
+
+#define MPC52xx_PCI_GSCR_BM 0x40000000
+#define MPC52xx_PCI_GSCR_PE 0x20000000
+#define MPC52xx_PCI_GSCR_SE 0x10000000
+#define MPC52xx_PCI_GSCR_XLB2PCI_MASK 0x07000000
+#define MPC52xx_PCI_GSCR_XLB2PCI_SHIFT 24
+#define MPC52xx_PCI_GSCR_IPG2PCI_MASK 0x00070000
+#define MPC52xx_PCI_GSCR_IPG2PCI_SHIFT 16
+#define MPC52xx_PCI_GSCR_BME 0x00004000
+#define MPC52xx_PCI_GSCR_PEE 0x00002000
+#define MPC52xx_PCI_GSCR_SEE 0x00001000
+#define MPC52xx_PCI_GSCR_PR 0x00000001
+
+
+#define MPC52xx_PCI_IWBTAR_TRANSLATION(proc_ad,pci_ad,size) \
+ ( ( (proc_ad) & 0xff000000 ) | \
+ ( (((size) - 1) >> 8) & 0x00ff0000 ) | \
+ ( ((pci_ad) >> 16) & 0x0000ff00 ) )
+
+#define MPC52xx_PCI_IWCR_PACK(win0,win1,win2) (((win0) << 24) | \
+ ((win1) << 16) | \
+ ((win2) << 8))
+
+#define MPC52xx_PCI_IWCR_DISABLE 0x0
+#define MPC52xx_PCI_IWCR_ENABLE 0x1
+#define MPC52xx_PCI_IWCR_READ 0x0
+#define MPC52xx_PCI_IWCR_READ_LINE 0x2
+#define MPC52xx_PCI_IWCR_READ_MULTI 0x4
+#define MPC52xx_PCI_IWCR_MEM 0x0
+#define MPC52xx_PCI_IWCR_IO 0x8
+
+#define MPC52xx_PCI_TCR_P 0x01000000
+#define MPC52xx_PCI_TCR_LD 0x00010000
+#define MPC52xx_PCI_TCR_WCT8 0x00000008
+
+#define MPC52xx_PCI_TBATR_DISABLE 0x0
+#define MPC52xx_PCI_TBATR_ENABLE 0x1
+
+struct mpc52xx_pci {
+ u32 idr; /* PCI + 0x00 */
+ u32 scr; /* PCI + 0x04 */
+ u32 ccrir; /* PCI + 0x08 */
+ u32 cr1; /* PCI + 0x0C */
+ u32 bar0; /* PCI + 0x10 */
+ u32 bar1; /* PCI + 0x14 */
+ u8 reserved1[16]; /* PCI + 0x18 */
+ u32 ccpr; /* PCI + 0x28 */
+ u32 sid; /* PCI + 0x2C */
+ u32 erbar; /* PCI + 0x30 */
+ u32 cpr; /* PCI + 0x34 */
+ u8 reserved2[4]; /* PCI + 0x38 */
+ u32 cr2; /* PCI + 0x3C */
+ u8 reserved3[32]; /* PCI + 0x40 */
+ u32 gscr; /* PCI + 0x60 */
+ u32 tbatr0; /* PCI + 0x64 */
+ u32 tbatr1; /* PCI + 0x68 */
+ u32 tcr; /* PCI + 0x6C */
+ u32 iw0btar; /* PCI + 0x70 */
+ u32 iw1btar; /* PCI + 0x74 */
+ u32 iw2btar; /* PCI + 0x78 */
+ u8 reserved4[4]; /* PCI + 0x7C */
+ u32 iwcr; /* PCI + 0x80 */
+ u32 icr; /* PCI + 0x84 */
+ u32 isr; /* PCI + 0x88 */
+ u32 arb; /* PCI + 0x8C */
+ u8 reserved5[104]; /* PCI + 0x90 */
+ u32 car; /* PCI + 0xF8 */
+ u8 reserved6[4]; /* PCI + 0xFC */
+};
+
+/* MPC5200 device tree match tables */
+const struct of_device_id mpc52xx_pci_ids[] __initconst = {
+ { .type = "pci", .compatible = "fsl,mpc5200-pci", },
+ { .type = "pci", .compatible = "mpc5200-pci", },
+ {}
+};
+
+/* ======================================================================== */
+/* PCI configuration access */
+/* ======================================================================== */
+
+static int
+mpc52xx_pci_read_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 *val)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ u32 value;
+
+ if (ppc_md.pci_exclude_device)
+ if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ out_be32(hose->cfg_addr,
+ (1 << 31) |
+ (bus->number << 16) |
+ (devfn << 8) |
+ (offset & 0xfc));
+ mb();
+
+#if defined(CONFIG_PPC_MPC5200_BUGFIX)
+ if (bus->number) {
+ /* workaround for the bug 435 of the MPC5200 (L25R);
+ * Don't do 32 bits config access during type-1 cycles */
+ switch (len) {
+ case 1:
+ value = in_8(((u8 __iomem *)hose->cfg_data) +
+ (offset & 3));
+ break;
+ case 2:
+ value = in_le16(((u16 __iomem *)hose->cfg_data) +
+ ((offset>>1) & 1));
+ break;
+
+ default:
+ value = in_le16((u16 __iomem *)hose->cfg_data) |
+ (in_le16(((u16 __iomem *)hose->cfg_data) + 1) << 16);
+ break;
+ }
+ }
+ else
+#endif
+ {
+ value = in_le32(hose->cfg_data);
+
+ if (len != 4) {
+ value >>= ((offset & 0x3) << 3);
+ value &= 0xffffffff >> (32 - (len << 3));
+ }
+ }
+
+ *val = value;
+
+ out_be32(hose->cfg_addr, 0);
+ mb();
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+mpc52xx_pci_write_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 val)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ u32 value, mask;
+
+ if (ppc_md.pci_exclude_device)
+ if (ppc_md.pci_exclude_device(hose, bus->number, devfn))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ out_be32(hose->cfg_addr,
+ (1 << 31) |
+ (bus->number << 16) |
+ (devfn << 8) |
+ (offset & 0xfc));
+ mb();
+
+#if defined(CONFIG_PPC_MPC5200_BUGFIX)
+ if (bus->number) {
+ /* workaround for the bug 435 of the MPC5200 (L25R);
+ * Don't do 32 bits config access during type-1 cycles */
+ switch (len) {
+ case 1:
+ out_8(((u8 __iomem *)hose->cfg_data) +
+ (offset & 3), val);
+ break;
+ case 2:
+ out_le16(((u16 __iomem *)hose->cfg_data) +
+ ((offset>>1) & 1), val);
+ break;
+
+ default:
+ out_le16((u16 __iomem *)hose->cfg_data,
+ (u16)val);
+ out_le16(((u16 __iomem *)hose->cfg_data) + 1,
+ (u16)(val>>16));
+ break;
+ }
+ }
+ else
+#endif
+ {
+ if (len != 4) {
+ value = in_le32(hose->cfg_data);
+
+ offset = (offset & 0x3) << 3;
+ mask = (0xffffffff >> (32 - (len << 3)));
+ mask <<= offset;
+
+ value &= ~mask;
+ val = value | ((val << offset) & mask);
+ }
+
+ out_le32(hose->cfg_data, val);
+ }
+ mb();
+
+ out_be32(hose->cfg_addr, 0);
+ mb();
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops mpc52xx_pci_ops = {
+ .read = mpc52xx_pci_read_config,
+ .write = mpc52xx_pci_write_config
+};
+
+
+/* ======================================================================== */
+/* PCI setup */
+/* ======================================================================== */
+
+static void __init
+mpc52xx_pci_setup(struct pci_controller *hose,
+ struct mpc52xx_pci __iomem *pci_regs, phys_addr_t pci_phys)
+{
+ struct resource *res;
+ u32 tmp;
+ int iwcr0 = 0, iwcr1 = 0, iwcr2 = 0;
+
+ pr_debug("%s(hose=%p, pci_regs=%p)\n", __func__, hose, pci_regs);
+
+ /* pci_process_bridge_OF_ranges() found all our addresses for us;
+ * now store them in the right places */
+ hose->cfg_addr = &pci_regs->car;
+ hose->cfg_data = hose->io_base_virt;
+
+ /* Control regs */
+ tmp = in_be32(&pci_regs->scr);
+ tmp |= PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY;
+ out_be32(&pci_regs->scr, tmp);
+
+ /* Memory windows */
+ res = &hose->mem_resources[0];
+ if (res->flags) {
+ pr_debug("mem_resource[0] = %pr\n", res);
+ out_be32(&pci_regs->iw0btar,
+ MPC52xx_PCI_IWBTAR_TRANSLATION(res->start, res->start,
+ resource_size(res)));
+ iwcr0 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_MEM;
+ if (res->flags & IORESOURCE_PREFETCH)
+ iwcr0 |= MPC52xx_PCI_IWCR_READ_MULTI;
+ else
+ iwcr0 |= MPC52xx_PCI_IWCR_READ;
+ }
+
+ res = &hose->mem_resources[1];
+ if (res->flags) {
+ pr_debug("mem_resource[1] = %pr\n", res);
+ out_be32(&pci_regs->iw1btar,
+ MPC52xx_PCI_IWBTAR_TRANSLATION(res->start, res->start,
+ resource_size(res)));
+ iwcr1 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_MEM;
+ if (res->flags & IORESOURCE_PREFETCH)
+ iwcr1 |= MPC52xx_PCI_IWCR_READ_MULTI;
+ else
+ iwcr1 |= MPC52xx_PCI_IWCR_READ;
+ }
+
+ /* IO resources */
+ res = &hose->io_resource;
+ if (!res) {
+ printk(KERN_ERR "%s: Didn't find IO resources\n", __FILE__);
+ return;
+ }
+ pr_debug(".io_resource = %pr .io_base_phys=0x%pa\n",
+ res, &hose->io_base_phys);
+ out_be32(&pci_regs->iw2btar,
+ MPC52xx_PCI_IWBTAR_TRANSLATION(hose->io_base_phys,
+ res->start,
+ resource_size(res)));
+ iwcr2 = MPC52xx_PCI_IWCR_ENABLE | MPC52xx_PCI_IWCR_IO;
+
+ /* Set all the IWCR fields at once; they're in the same reg */
+ out_be32(&pci_regs->iwcr, MPC52xx_PCI_IWCR_PACK(iwcr0, iwcr1, iwcr2));
+
+ /* Map IMMR onto PCI bus */
+ pci_phys &= 0xfffc0000; /* bar0 has only 14 significant bits */
+ out_be32(&pci_regs->tbatr0, MPC52xx_PCI_TBATR_ENABLE | pci_phys);
+ out_be32(&pci_regs->bar0, PCI_BASE_ADDRESS_MEM_PREFETCH | pci_phys);
+
+ /* Map memory onto PCI bus */
+ out_be32(&pci_regs->tbatr1, MPC52xx_PCI_TBATR_ENABLE);
+ out_be32(&pci_regs->bar1, PCI_BASE_ADDRESS_MEM_PREFETCH);
+
+ out_be32(&pci_regs->tcr, MPC52xx_PCI_TCR_LD | MPC52xx_PCI_TCR_WCT8);
+
+ tmp = in_be32(&pci_regs->gscr);
+#if 0
+ /* Reset the exteral bus ( internal PCI controller is NOT reset ) */
+ /* Not necessary and can be a bad thing if for example the bootloader
+ is displaying a splash screen or ... Just left here for
+ documentation purpose if anyone need it */
+ out_be32(&pci_regs->gscr, tmp | MPC52xx_PCI_GSCR_PR);
+ udelay(50);
+#endif
+
+ /* Make sure the PCI bridge is out of reset */
+ out_be32(&pci_regs->gscr, tmp & ~MPC52xx_PCI_GSCR_PR);
+}
+
+static void
+mpc52xx_pci_fixup_resources(struct pci_dev *dev)
+{
+ struct resource *res;
+
+ pr_debug("%s() %.4x:%.4x\n", __func__, dev->vendor, dev->device);
+
+ /* We don't rely on boot loader for PCI and resets all
+ devices */
+ pci_dev_for_each_resource(dev, res) {
+ if (res->end > res->start) { /* Only valid resources */
+ res->end -= res->start;
+ res->start = 0;
+ res->flags |= IORESOURCE_UNSET;
+ }
+ }
+
+ /* The PCI Host bridge of MPC52xx has a prefetch memory resource
+ fixed to 1Gb. Doesn't fit in the resource system so we remove it */
+ if ( (dev->vendor == PCI_VENDOR_ID_MOTOROLA) &&
+ ( dev->device == PCI_DEVICE_ID_MOTOROLA_MPC5200
+ || dev->device == PCI_DEVICE_ID_MOTOROLA_MPC5200B) ) {
+ struct resource *res = &dev->resource[1];
+ res->start = res->end = res->flags = 0;
+ }
+}
+
+int __init
+mpc52xx_add_bridge(struct device_node *node)
+{
+ int len;
+ struct mpc52xx_pci __iomem *pci_regs;
+ struct pci_controller *hose;
+ const int *bus_range;
+ struct resource rsrc;
+
+ pr_debug("Adding MPC52xx PCI host bridge %pOF\n", node);
+
+ pci_add_flags(PCI_REASSIGN_ALL_BUS);
+
+ if (of_address_to_resource(node, 0, &rsrc) != 0) {
+ printk(KERN_ERR "Can't get %pOF resources\n", node);
+ return -EINVAL;
+ }
+
+ bus_range = of_get_property(node, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ printk(KERN_WARNING "Can't get %pOF bus-range, assume bus 0\n",
+ node);
+ bus_range = NULL;
+ }
+
+ /* There are some PCI quirks on the 52xx, register the hook to
+ * fix them. */
+ ppc_md.pcibios_fixup_resources = mpc52xx_pci_fixup_resources;
+
+ /* Alloc and initialize the pci controller. Values in the device
+ * tree are needed to configure the 52xx PCI controller. Rather
+ * than parse the tree here, let pci_process_bridge_OF_ranges()
+ * do it for us and extract the values after the fact */
+ hose = pcibios_alloc_controller(node);
+ if (!hose)
+ return -ENOMEM;
+
+ hose->first_busno = bus_range ? bus_range[0] : 0;
+ hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+ hose->ops = &mpc52xx_pci_ops;
+
+ pci_regs = ioremap(rsrc.start, resource_size(&rsrc));
+ if (!pci_regs)
+ return -ENOMEM;
+
+ pci_process_bridge_OF_ranges(hose, node, 1);
+
+ /* Finish setting up PCI using values obtained by
+ * pci_proces_bridge_OF_ranges */
+ mpc52xx_pci_setup(hose, pci_regs, rsrc.start);
+
+ return 0;
+}
+
+void __init mpc52xx_setup_pci(void)
+{
+ struct device_node *pci;
+
+ pci = of_find_matching_node(NULL, mpc52xx_pci_ids);
+ if (!pci)
+ return;
+
+ mpc52xx_add_bridge(pci);
+ of_node_put(pci);
+}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
new file mode 100644
index 000000000..1e0a5e964
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -0,0 +1,519 @@
+/*
+ *
+ * Programmable Interrupt Controller functions for the Freescale MPC52xx.
+ *
+ * Copyright (C) 2008 Secret Lab Technologies Ltd.
+ * Copyright (C) 2006 bplan GmbH
+ * Copyright (C) 2004 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2003 Montavista Software, Inc
+ *
+ * Based on the code from the 2.4 kernel by
+ * Dale Farnsworth <dfarnsworth@mvista.com> and Kent Borg.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+/*
+ * This is the device driver for the MPC5200 interrupt controller.
+ *
+ * hardware overview
+ * -----------------
+ * The MPC5200 interrupt controller groups the all interrupt sources into
+ * three groups called 'critical', 'main', and 'peripheral'. The critical
+ * group has 3 irqs, External IRQ0, slice timer 0 irq, and wake from deep
+ * sleep. Main group include the other 3 external IRQs, slice timer 1, RTC,
+ * gpios, and the general purpose timers. Peripheral group contains the
+ * remaining irq sources from all of the on-chip peripherals (PSCs, Ethernet,
+ * USB, DMA, etc).
+ *
+ * virqs
+ * -----
+ * The Linux IRQ subsystem requires that each irq source be assigned a
+ * system wide unique IRQ number starting at 1 (0 means no irq). Since
+ * systems can have multiple interrupt controllers, the virtual IRQ (virq)
+ * infrastructure lets each interrupt controller to define a local set
+ * of IRQ numbers and the virq infrastructure maps those numbers into
+ * a unique range of the global IRQ# space.
+ *
+ * To define a range of virq numbers for this controller, this driver first
+ * assigns a number to each of the irq groups (called the level 1 or L1
+ * value). Within each group individual irq sources are also assigned a
+ * number, as defined by the MPC5200 user guide, and refers to it as the
+ * level 2 or L2 value. The virq number is determined by shifting up the
+ * L1 value by MPC52xx_IRQ_L1_OFFSET and ORing it with the L2 value.
+ *
+ * For example, the TMR0 interrupt is irq 9 in the main group. The
+ * virq for TMR0 is calculated by ((1 << MPC52xx_IRQ_L1_OFFSET) | 9).
+ *
+ * The observant reader will also notice that this driver defines a 4th
+ * interrupt group called 'bestcomm'. The bestcomm group isn't physically
+ * part of the MPC5200 interrupt controller, but it is used here to assign
+ * a separate virq number for each bestcomm task (since any of the 16
+ * bestcomm tasks can cause the bestcomm interrupt to be raised). When a
+ * bestcomm interrupt occurs (peripheral group, irq 0) this driver determines
+ * which task needs servicing and returns the irq number for that task. This
+ * allows drivers which use bestcomm to define their own interrupt handlers.
+ *
+ * irq_chip structures
+ * -------------------
+ * For actually manipulating IRQs (masking, enabling, clearing, etc) this
+ * driver defines four separate 'irq_chip' structures, one for the main
+ * group, one for the peripherals group, one for the bestcomm group and one
+ * for external interrupts. The irq_chip structures provide the hooks needed
+ * to manipulate each IRQ source, and since each group is has a separate set
+ * of registers for controlling the irq, it makes sense to divide up the
+ * hooks along those lines.
+ *
+ * You'll notice that there is not an irq_chip for the critical group and
+ * you'll also notice that there is an irq_chip defined for external
+ * interrupts even though there is no external interrupt group. The reason
+ * for this is that the four external interrupts are all managed with the same
+ * register even though one of the external IRQs is in the critical group and
+ * the other three are in the main group. For this reason it makes sense for
+ * the 4 external irqs to be managed using a separate set of hooks. The
+ * reason there is no crit irq_chip is that of the 3 irqs in the critical
+ * group, only external interrupt is actually support at this time by this
+ * driver and since external interrupt is the only one used, it can just
+ * be directed to make use of the external irq irq_chip.
+ *
+ * device tree bindings
+ * --------------------
+ * The device tree bindings for this controller reflect the two level
+ * organization of irqs in the device. #interrupt-cells = <3> where the
+ * first cell is the group number [0..3], the second cell is the irq
+ * number in the group, and the third cell is the sense type (level/edge).
+ * For reference, the following is a list of the interrupt property values
+ * associated with external interrupt sources on the MPC5200 (just because
+ * it is non-obvious to determine what the interrupts property should be
+ * when reading the mpc5200 manual and it is a frequently asked question).
+ *
+ * External interrupts:
+ * <0 0 n> external irq0, n is sense (n=0: level high,
+ * <1 1 n> external irq1, n is sense n=1: edge rising,
+ * <1 2 n> external irq2, n is sense n=2: edge falling,
+ * <1 3 n> external irq3, n is sense n=3: level low)
+ */
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/io.h>
+#include <asm/mpc52xx.h>
+
+/* HW IRQ mapping */
+#define MPC52xx_IRQ_L1_CRIT (0)
+#define MPC52xx_IRQ_L1_MAIN (1)
+#define MPC52xx_IRQ_L1_PERP (2)
+#define MPC52xx_IRQ_L1_SDMA (3)
+
+#define MPC52xx_IRQ_L1_OFFSET (6)
+#define MPC52xx_IRQ_L1_MASK (0x00c0)
+#define MPC52xx_IRQ_L2_MASK (0x003f)
+
+#define MPC52xx_IRQ_HIGHTESTHWIRQ (0xd0)
+
+
+/* MPC5200 device tree match tables */
+static const struct of_device_id mpc52xx_pic_ids[] __initconst = {
+ { .compatible = "fsl,mpc5200-pic", },
+ { .compatible = "mpc5200-pic", },
+ {}
+};
+static const struct of_device_id mpc52xx_sdma_ids[] __initconst = {
+ { .compatible = "fsl,mpc5200-bestcomm", },
+ { .compatible = "mpc5200-bestcomm", },
+ {}
+};
+
+static struct mpc52xx_intr __iomem *intr;
+static struct mpc52xx_sdma __iomem *sdma;
+static struct irq_domain *mpc52xx_irqhost = NULL;
+
+static unsigned char mpc52xx_map_senses[4] = {
+ IRQ_TYPE_LEVEL_HIGH,
+ IRQ_TYPE_EDGE_RISING,
+ IRQ_TYPE_EDGE_FALLING,
+ IRQ_TYPE_LEVEL_LOW,
+};
+
+/* Utility functions */
+static inline void io_be_setbit(u32 __iomem *addr, int bitno)
+{
+ out_be32(addr, in_be32(addr) | (1 << bitno));
+}
+
+static inline void io_be_clrbit(u32 __iomem *addr, int bitno)
+{
+ out_be32(addr, in_be32(addr) & ~(1 << bitno));
+}
+
+/*
+ * IRQ[0-3] interrupt irq_chip
+ */
+static void mpc52xx_extirq_mask(struct irq_data *d)
+{
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ io_be_clrbit(&intr->ctrl, 11 - l2irq);
+}
+
+static void mpc52xx_extirq_unmask(struct irq_data *d)
+{
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ io_be_setbit(&intr->ctrl, 11 - l2irq);
+}
+
+static void mpc52xx_extirq_ack(struct irq_data *d)
+{
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ io_be_setbit(&intr->ctrl, 27-l2irq);
+}
+
+static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type)
+{
+ u32 ctrl_reg, type;
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ void *handler = handle_level_irq;
+
+ pr_debug("%s: irq=%x. l2=%d flow_type=%d\n", __func__,
+ (int) irqd_to_hwirq(d), l2irq, flow_type);
+
+ switch (flow_type) {
+ case IRQF_TRIGGER_HIGH: type = 0; break;
+ case IRQF_TRIGGER_RISING: type = 1; handler = handle_edge_irq; break;
+ case IRQF_TRIGGER_FALLING: type = 2; handler = handle_edge_irq; break;
+ case IRQF_TRIGGER_LOW: type = 3; break;
+ default:
+ type = 0;
+ }
+
+ ctrl_reg = in_be32(&intr->ctrl);
+ ctrl_reg &= ~(0x3 << (22 - (l2irq * 2)));
+ ctrl_reg |= (type << (22 - (l2irq * 2)));
+ out_be32(&intr->ctrl, ctrl_reg);
+
+ irq_set_handler_locked(d, handler);
+
+ return 0;
+}
+
+static struct irq_chip mpc52xx_extirq_irqchip = {
+ .name = "MPC52xx External",
+ .irq_mask = mpc52xx_extirq_mask,
+ .irq_unmask = mpc52xx_extirq_unmask,
+ .irq_ack = mpc52xx_extirq_ack,
+ .irq_set_type = mpc52xx_extirq_set_type,
+};
+
+/*
+ * Main interrupt irq_chip
+ */
+static int mpc52xx_null_set_type(struct irq_data *d, unsigned int flow_type)
+{
+ return 0; /* Do nothing so that the sense mask will get updated */
+}
+
+static void mpc52xx_main_mask(struct irq_data *d)
+{
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ io_be_setbit(&intr->main_mask, 16 - l2irq);
+}
+
+static void mpc52xx_main_unmask(struct irq_data *d)
+{
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ io_be_clrbit(&intr->main_mask, 16 - l2irq);
+}
+
+static struct irq_chip mpc52xx_main_irqchip = {
+ .name = "MPC52xx Main",
+ .irq_mask = mpc52xx_main_mask,
+ .irq_mask_ack = mpc52xx_main_mask,
+ .irq_unmask = mpc52xx_main_unmask,
+ .irq_set_type = mpc52xx_null_set_type,
+};
+
+/*
+ * Peripherals interrupt irq_chip
+ */
+static void mpc52xx_periph_mask(struct irq_data *d)
+{
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ io_be_setbit(&intr->per_mask, 31 - l2irq);
+}
+
+static void mpc52xx_periph_unmask(struct irq_data *d)
+{
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ io_be_clrbit(&intr->per_mask, 31 - l2irq);
+}
+
+static struct irq_chip mpc52xx_periph_irqchip = {
+ .name = "MPC52xx Peripherals",
+ .irq_mask = mpc52xx_periph_mask,
+ .irq_mask_ack = mpc52xx_periph_mask,
+ .irq_unmask = mpc52xx_periph_unmask,
+ .irq_set_type = mpc52xx_null_set_type,
+};
+
+/*
+ * SDMA interrupt irq_chip
+ */
+static void mpc52xx_sdma_mask(struct irq_data *d)
+{
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ io_be_setbit(&sdma->IntMask, l2irq);
+}
+
+static void mpc52xx_sdma_unmask(struct irq_data *d)
+{
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ io_be_clrbit(&sdma->IntMask, l2irq);
+}
+
+static void mpc52xx_sdma_ack(struct irq_data *d)
+{
+ int l2irq = irqd_to_hwirq(d) & MPC52xx_IRQ_L2_MASK;
+ out_be32(&sdma->IntPend, 1 << l2irq);
+}
+
+static struct irq_chip mpc52xx_sdma_irqchip = {
+ .name = "MPC52xx SDMA",
+ .irq_mask = mpc52xx_sdma_mask,
+ .irq_unmask = mpc52xx_sdma_unmask,
+ .irq_ack = mpc52xx_sdma_ack,
+ .irq_set_type = mpc52xx_null_set_type,
+};
+
+/**
+ * mpc52xx_is_extirq - Returns true if hwirq number is for an external IRQ
+ */
+static int mpc52xx_is_extirq(int l1, int l2)
+{
+ return ((l1 == 0) && (l2 == 0)) ||
+ ((l1 == 1) && (l2 >= 1) && (l2 <= 3));
+}
+
+/**
+ * mpc52xx_irqhost_xlate - translate virq# from device tree interrupts property
+ */
+static int mpc52xx_irqhost_xlate(struct irq_domain *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq,
+ unsigned int *out_flags)
+{
+ int intrvect_l1;
+ int intrvect_l2;
+ int intrvect_type;
+ int intrvect_linux;
+
+ if (intsize != 3)
+ return -1;
+
+ intrvect_l1 = (int)intspec[0];
+ intrvect_l2 = (int)intspec[1];
+ intrvect_type = (int)intspec[2] & 0x3;
+
+ intrvect_linux = (intrvect_l1 << MPC52xx_IRQ_L1_OFFSET) &
+ MPC52xx_IRQ_L1_MASK;
+ intrvect_linux |= intrvect_l2 & MPC52xx_IRQ_L2_MASK;
+
+ *out_hwirq = intrvect_linux;
+ *out_flags = IRQ_TYPE_LEVEL_LOW;
+ if (mpc52xx_is_extirq(intrvect_l1, intrvect_l2))
+ *out_flags = mpc52xx_map_senses[intrvect_type];
+
+ pr_debug("return %x, l1=%d, l2=%d\n", intrvect_linux, intrvect_l1,
+ intrvect_l2);
+ return 0;
+}
+
+/**
+ * mpc52xx_irqhost_map - Hook to map from virq to an irq_chip structure
+ */
+static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t irq)
+{
+ int l1irq;
+ int l2irq;
+ struct irq_chip *irqchip;
+ void *hndlr;
+ int type;
+ u32 reg;
+
+ l1irq = (irq & MPC52xx_IRQ_L1_MASK) >> MPC52xx_IRQ_L1_OFFSET;
+ l2irq = irq & MPC52xx_IRQ_L2_MASK;
+
+ /*
+ * External IRQs are handled differently by the hardware so they are
+ * handled by a dedicated irq_chip structure.
+ */
+ if (mpc52xx_is_extirq(l1irq, l2irq)) {
+ reg = in_be32(&intr->ctrl);
+ type = mpc52xx_map_senses[(reg >> (22 - l2irq * 2)) & 0x3];
+ if ((type == IRQ_TYPE_EDGE_FALLING) ||
+ (type == IRQ_TYPE_EDGE_RISING))
+ hndlr = handle_edge_irq;
+ else
+ hndlr = handle_level_irq;
+
+ irq_set_chip_and_handler(virq, &mpc52xx_extirq_irqchip, hndlr);
+ pr_debug("%s: External IRQ%i virq=%x, hw=%x. type=%x\n",
+ __func__, l2irq, virq, (int)irq, type);
+ return 0;
+ }
+
+ /* It is an internal SOC irq. Choose the correct irq_chip */
+ switch (l1irq) {
+ case MPC52xx_IRQ_L1_MAIN: irqchip = &mpc52xx_main_irqchip; break;
+ case MPC52xx_IRQ_L1_PERP: irqchip = &mpc52xx_periph_irqchip; break;
+ case MPC52xx_IRQ_L1_SDMA: irqchip = &mpc52xx_sdma_irqchip; break;
+ case MPC52xx_IRQ_L1_CRIT:
+ pr_warn("%s: Critical IRQ #%d is unsupported! Nopping it.\n",
+ __func__, l2irq);
+ irq_set_chip(virq, &no_irq_chip);
+ return 0;
+ }
+
+ irq_set_chip_and_handler(virq, irqchip, handle_level_irq);
+ pr_debug("%s: virq=%x, l1=%i, l2=%i\n", __func__, virq, l1irq, l2irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops mpc52xx_irqhost_ops = {
+ .xlate = mpc52xx_irqhost_xlate,
+ .map = mpc52xx_irqhost_map,
+};
+
+/**
+ * mpc52xx_init_irq - Initialize and register with the virq subsystem
+ *
+ * Hook for setting up IRQs on an mpc5200 system. A pointer to this function
+ * is to be put into the machine definition structure.
+ *
+ * This function searches the device tree for an MPC5200 interrupt controller,
+ * initializes it, and registers it with the virq subsystem.
+ */
+void __init mpc52xx_init_irq(void)
+{
+ u32 intr_ctrl;
+ struct device_node *picnode;
+ struct device_node *np;
+
+ /* Remap the necessary zones */
+ picnode = of_find_matching_node(NULL, mpc52xx_pic_ids);
+ intr = of_iomap(picnode, 0);
+ if (!intr)
+ panic(__FILE__ ": find_and_map failed on 'mpc5200-pic'. "
+ "Check node !");
+
+ np = of_find_matching_node(NULL, mpc52xx_sdma_ids);
+ sdma = of_iomap(np, 0);
+ of_node_put(np);
+ if (!sdma)
+ panic(__FILE__ ": find_and_map failed on 'mpc5200-bestcomm'. "
+ "Check node !");
+
+ pr_debug("MPC5200 IRQ controller mapped to 0x%p\n", intr);
+
+ /* Disable all interrupt sources. */
+ out_be32(&sdma->IntPend, 0xffffffff); /* 1 means clear pending */
+ out_be32(&sdma->IntMask, 0xffffffff); /* 1 means disabled */
+ out_be32(&intr->per_mask, 0x7ffffc00); /* 1 means disabled */
+ out_be32(&intr->main_mask, 0x00010fff); /* 1 means disabled */
+ intr_ctrl = in_be32(&intr->ctrl);
+ intr_ctrl &= 0x00ff0000; /* Keeps IRQ[0-3] config */
+ intr_ctrl |= 0x0f000000 | /* clear IRQ 0-3 */
+ 0x00001000 | /* MEE master external enable */
+ 0x00000000 | /* 0 means disable IRQ 0-3 */
+ 0x00000001; /* CEb route critical normally */
+ out_be32(&intr->ctrl, intr_ctrl);
+
+ /* Zero a bunch of the priority settings. */
+ out_be32(&intr->per_pri1, 0);
+ out_be32(&intr->per_pri2, 0);
+ out_be32(&intr->per_pri3, 0);
+ out_be32(&intr->main_pri1, 0);
+ out_be32(&intr->main_pri2, 0);
+
+ /*
+ * As last step, add an irq host to translate the real
+ * hw irq information provided by the ofw to linux virq
+ */
+ mpc52xx_irqhost = irq_domain_add_linear(picnode,
+ MPC52xx_IRQ_HIGHTESTHWIRQ,
+ &mpc52xx_irqhost_ops, NULL);
+
+ if (!mpc52xx_irqhost)
+ panic(__FILE__ ": Cannot allocate the IRQ host\n");
+
+ irq_set_default_host(mpc52xx_irqhost);
+
+ pr_info("MPC52xx PIC is up and running!\n");
+}
+
+/**
+ * mpc52xx_get_irq - Get pending interrupt number hook function
+ *
+ * Called by the interrupt handler to determine what IRQ handler needs to be
+ * executed.
+ *
+ * Status of pending interrupts is determined by reading the encoded status
+ * register. The encoded status register has three fields; one for each of the
+ * types of interrupts defined by the controller - 'critical', 'main' and
+ * 'peripheral'. This function reads the status register and returns the IRQ
+ * number associated with the highest priority pending interrupt. 'Critical'
+ * interrupts have the highest priority, followed by 'main' interrupts, and
+ * then 'peripheral'.
+ *
+ * The mpc5200 interrupt controller can be configured to boost the priority
+ * of individual 'peripheral' interrupts. If this is the case then a special
+ * value will appear in either the crit or main fields indicating a high
+ * or medium priority peripheral irq has occurred.
+ *
+ * This function checks each of the 3 irq request fields and returns the
+ * first pending interrupt that it finds.
+ *
+ * This function also identifies a 4th type of interrupt; 'bestcomm'. Each
+ * bestcomm DMA task can raise the bestcomm peripheral interrupt. When this
+ * occurs at task-specific IRQ# is decoded so that each task can have its
+ * own IRQ handler.
+ */
+unsigned int mpc52xx_get_irq(void)
+{
+ u32 status;
+ int irq;
+
+ status = in_be32(&intr->enc_status);
+ if (status & 0x00000400) { /* critical */
+ irq = (status >> 8) & 0x3;
+ if (irq == 2) /* high priority peripheral */
+ goto peripheral;
+ irq |= (MPC52xx_IRQ_L1_CRIT << MPC52xx_IRQ_L1_OFFSET);
+ } else if (status & 0x00200000) { /* main */
+ irq = (status >> 16) & 0x1f;
+ if (irq == 4) /* low priority peripheral */
+ goto peripheral;
+ irq |= (MPC52xx_IRQ_L1_MAIN << MPC52xx_IRQ_L1_OFFSET);
+ } else if (status & 0x20000000) { /* peripheral */
+ peripheral:
+ irq = (status >> 24) & 0x1f;
+ if (irq == 0) { /* bestcomm */
+ status = in_be32(&sdma->IntPend);
+ irq = ffs(status) - 1;
+ irq |= (MPC52xx_IRQ_L1_SDMA << MPC52xx_IRQ_L1_OFFSET);
+ } else {
+ irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET);
+ }
+ } else {
+ return 0;
+ }
+
+ return irq_linear_revmap(mpc52xx_irqhost, irq);
+}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
new file mode 100644
index 000000000..f0c31ae15
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/suspend.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/cacheflush.h>
+#include <asm/mpc52xx.h>
+
+/* these are defined in mpc52xx_sleep.S, and only used here */
+extern void mpc52xx_deep_sleep(void __iomem *sram, void __iomem *sdram_regs,
+ struct mpc52xx_cdm __iomem *, struct mpc52xx_intr __iomem*);
+extern void mpc52xx_ds_sram(void);
+extern const long mpc52xx_ds_sram_size;
+extern void mpc52xx_ds_cached(void);
+extern const long mpc52xx_ds_cached_size;
+
+static void __iomem *mbar;
+static void __iomem *sdram;
+static struct mpc52xx_cdm __iomem *cdm;
+static struct mpc52xx_intr __iomem *intr;
+static struct mpc52xx_gpio_wkup __iomem *gpiow;
+static void __iomem *sram;
+static int sram_size;
+
+struct mpc52xx_suspend mpc52xx_suspend;
+
+static int mpc52xx_pm_valid(suspend_state_t state)
+{
+ switch (state) {
+ case PM_SUSPEND_STANDBY:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+int mpc52xx_set_wakeup_gpio(u8 pin, u8 level)
+{
+ u16 tmp;
+
+ /* enable gpio */
+ out_8(&gpiow->wkup_gpioe, in_8(&gpiow->wkup_gpioe) | (1 << pin));
+ /* set as input */
+ out_8(&gpiow->wkup_ddr, in_8(&gpiow->wkup_ddr) & ~(1 << pin));
+ /* enable deep sleep interrupt */
+ out_8(&gpiow->wkup_inten, in_8(&gpiow->wkup_inten) | (1 << pin));
+ /* low/high level creates wakeup interrupt */
+ tmp = in_be16(&gpiow->wkup_itype);
+ tmp &= ~(0x3 << (pin * 2));
+ tmp |= (!level + 1) << (pin * 2);
+ out_be16(&gpiow->wkup_itype, tmp);
+ /* master enable */
+ out_8(&gpiow->wkup_maste, 1);
+
+ return 0;
+}
+
+int mpc52xx_pm_prepare(void)
+{
+ struct device_node *np;
+ static const struct of_device_id immr_ids[] = {
+ { .compatible = "fsl,mpc5200-immr", },
+ { .compatible = "fsl,mpc5200b-immr", },
+ { .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
+ { .type = "builtin", .compatible = "mpc5200", }, /* efika */
+ {}
+ };
+ struct resource res;
+
+ /* map the whole register space */
+ np = of_find_matching_node(NULL, immr_ids);
+
+ if (of_address_to_resource(np, 0, &res)) {
+ pr_err("mpc52xx_pm_prepare(): could not get IMMR address\n");
+ of_node_put(np);
+ return -ENOSYS;
+ }
+
+ mbar = ioremap(res.start, 0xc000); /* we should map whole region including SRAM */
+
+ of_node_put(np);
+ if (!mbar) {
+ pr_err("mpc52xx_pm_prepare(): could not map registers\n");
+ return -ENOSYS;
+ }
+ /* these offsets are from mpc5200 users manual */
+ sdram = mbar + 0x100;
+ cdm = mbar + 0x200;
+ intr = mbar + 0x500;
+ gpiow = mbar + 0xc00;
+ sram = mbar + 0x8000; /* Those will be handled by the */
+ sram_size = 0x4000; /* bestcomm driver soon */
+
+ /* call board suspend code, if applicable */
+ if (mpc52xx_suspend.board_suspend_prepare)
+ mpc52xx_suspend.board_suspend_prepare(mbar);
+ else {
+ printk(KERN_ALERT "%s: %i don't know how to wake up the board\n",
+ __func__, __LINE__);
+ goto out_unmap;
+ }
+
+ return 0;
+
+ out_unmap:
+ iounmap(mbar);
+ return -ENOSYS;
+}
+
+
+char saved_sram[0x4000];
+
+int mpc52xx_pm_enter(suspend_state_t state)
+{
+ u32 clk_enables;
+ u32 msr, hid0;
+ u32 intr_main_mask;
+ void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500;
+ unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size;
+ char saved_0x500[0x600-0x500];
+
+ if (WARN_ON(mpc52xx_ds_cached_size > sizeof(saved_0x500)))
+ return -ENOMEM;
+
+ /* disable all interrupts in PIC */
+ intr_main_mask = in_be32(&intr->main_mask);
+ out_be32(&intr->main_mask, intr_main_mask | 0x1ffff);
+
+ /* don't let DEC expire any time soon */
+ mtspr(SPRN_DEC, 0x7fffffff);
+
+ /* save SRAM */
+ memcpy(saved_sram, sram, sram_size);
+
+ /* copy low level suspend code to sram */
+ memcpy(sram, mpc52xx_ds_sram, mpc52xx_ds_sram_size);
+
+ out_8(&cdm->ccs_sleep_enable, 1);
+ out_8(&cdm->osc_sleep_enable, 1);
+ out_8(&cdm->ccs_qreq_test, 1);
+
+ /* disable all but SDRAM and bestcomm (SRAM) clocks */
+ clk_enables = in_be32(&cdm->clk_enables);
+ out_be32(&cdm->clk_enables, clk_enables & 0x00088000);
+
+ /* disable power management */
+ msr = mfmsr();
+ mtmsr(msr & ~MSR_POW);
+
+ /* enable sleep mode, disable others */
+ hid0 = mfspr(SPRN_HID0);
+ mtspr(SPRN_HID0, (hid0 & ~(HID0_DOZE | HID0_NAP | HID0_DPM)) | HID0_SLEEP);
+
+ /* save original, copy our irq handler, flush from dcache and invalidate icache */
+ memcpy(saved_0x500, irq_0x500, mpc52xx_ds_cached_size);
+ memcpy(irq_0x500, mpc52xx_ds_cached, mpc52xx_ds_cached_size);
+ flush_icache_range((unsigned long)irq_0x500, irq_0x500_stop);
+
+ /* call low-level sleep code */
+ mpc52xx_deep_sleep(sram, sdram, cdm, intr);
+
+ /* restore original irq handler */
+ memcpy(irq_0x500, saved_0x500, mpc52xx_ds_cached_size);
+ flush_icache_range((unsigned long)irq_0x500, irq_0x500_stop);
+
+ /* restore old power mode */
+ mtmsr(msr & ~MSR_POW);
+ mtspr(SPRN_HID0, hid0);
+ mtmsr(msr);
+
+ out_be32(&cdm->clk_enables, clk_enables);
+ out_8(&cdm->ccs_sleep_enable, 0);
+ out_8(&cdm->osc_sleep_enable, 0);
+
+ /* restore SRAM */
+ memcpy(sram, saved_sram, sram_size);
+
+ /* reenable interrupts in PIC */
+ out_be32(&intr->main_mask, intr_main_mask);
+
+ return 0;
+}
+
+void mpc52xx_pm_finish(void)
+{
+ /* call board resume code */
+ if (mpc52xx_suspend.board_resume_finish)
+ mpc52xx_suspend.board_resume_finish(mbar);
+
+ iounmap(mbar);
+}
+
+static const struct platform_suspend_ops mpc52xx_pm_ops = {
+ .valid = mpc52xx_pm_valid,
+ .prepare = mpc52xx_pm_prepare,
+ .enter = mpc52xx_pm_enter,
+ .finish = mpc52xx_pm_finish,
+};
+
+int __init mpc52xx_pm_init(void)
+{
+ suspend_set_ops(&mpc52xx_pm_ops);
+ return 0;
+}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_sleep.S b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
new file mode 100644
index 000000000..a66eb311b
--- /dev/null
+++ b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+
+
+.text
+
+_GLOBAL(mpc52xx_deep_sleep)
+mpc52xx_deep_sleep: /* args r3-r6: SRAM, SDRAM regs, CDM regs, INTR regs */
+
+ /* enable interrupts */
+ mfmsr r7
+ ori r7, r7, 0x8000 /* EE */
+ mtmsr r7
+ sync; isync;
+
+ li r10, 0 /* flag that irq handler sets */
+
+ /* enable tmr7 (or any other) interrupt */
+ lwz r8, 0x14(r6) /* intr->main_mask */
+ ori r8, r8, 0x1
+ xori r8, r8, 0x1
+ stw r8, 0x14(r6)
+ sync
+
+ /* emulate tmr7 interrupt */
+ li r8, 0x1
+ stw r8, 0x40(r6) /* intr->main_emulate */
+ sync
+
+ /* wait for it to happen */
+1:
+ cmpi cr0, r10, 1
+ bne cr0, 1b
+
+ /* lock icache */
+ mfspr r10, SPRN_HID0
+ ori r10, r10, 0x2000
+ sync; isync;
+ mtspr SPRN_HID0, r10
+ sync; isync;
+
+
+ mflr r9 /* save LR */
+
+ /* jump to sram */
+ mtlr r3
+ blrl
+
+ mtlr r9 /* restore LR */
+
+ /* unlock icache */
+ mfspr r10, SPRN_HID0
+ ori r10, r10, 0x2000
+ xori r10, r10, 0x2000
+ sync; isync;
+ mtspr SPRN_HID0, r10
+ sync; isync;
+
+
+ /* return to C code */
+ blr
+
+
+_GLOBAL(mpc52xx_ds_sram)
+mpc52xx_ds_sram:
+ /* put SDRAM into self-refresh */
+ lwz r8, 0x4(r4) /* sdram->ctrl */
+
+ oris r8, r8, 0x8000 /* mode_en */
+ stw r8, 0x4(r4)
+ sync
+
+ ori r8, r8, 0x0002 /* soft_pre */
+ stw r8, 0x4(r4)
+ sync
+ xori r8, r8, 0x0002
+
+ xoris r8, r8, 0x8000 /* !mode_en */
+ stw r8, 0x4(r4)
+ sync
+
+ oris r8, r8, 0x5000
+ xoris r8, r8, 0x4000 /* ref_en !cke */
+ stw r8, 0x4(r4)
+ sync
+
+ /* disable SDRAM clock */
+ lwz r8, 0x14(r5) /* cdm->clkenable */
+ ori r8, r8, 0x0008
+ xori r8, r8, 0x0008
+ stw r8, 0x14(r5)
+ sync
+
+
+ /* put mpc5200 to sleep */
+ mfmsr r10
+ oris r10, r10, 0x0004 /* POW = 1 */
+ sync; isync;
+ mtmsr r10
+ sync; isync;
+
+
+ /* enable clock */
+ lwz r8, 0x14(r5)
+ ori r8, r8, 0x0008
+ stw r8, 0x14(r5)
+ sync
+
+ /* get ram out of self-refresh */
+ lwz r8, 0x4(r4)
+ oris r8, r8, 0x5000 /* cke ref_en */
+ stw r8, 0x4(r4)
+ sync
+
+ blr
+_GLOBAL(mpc52xx_ds_sram_size)
+mpc52xx_ds_sram_size:
+ .long $-mpc52xx_ds_sram
+
+
+/* ### interrupt handler for wakeup from deep-sleep ### */
+_GLOBAL(mpc52xx_ds_cached)
+mpc52xx_ds_cached:
+ mtspr SPRN_SPRG0, r7
+ mtspr SPRN_SPRG1, r8
+
+ /* disable emulated interrupt */
+ mfspr r7, 311 /* MBAR */
+ addi r7, r7, 0x540 /* intr->main_emul */
+ li r8, 0
+ stw r8, 0(r7)
+ sync
+ dcbf 0, r7
+
+ /* acknowledge wakeup, so CCS releases power pown */
+ mfspr r7, 311 /* MBAR */
+ addi r7, r7, 0x524 /* intr->enc_status */
+ lwz r8, 0(r7)
+ ori r8, r8, 0x0400
+ stw r8, 0(r7)
+ sync
+ dcbf 0, r7
+
+ /* flag - we handled the interrupt */
+ li r10, 1
+
+ mfspr r8, SPRN_SPRG1
+ mfspr r7, SPRN_SPRG0
+
+ rfi
+_GLOBAL(mpc52xx_ds_cached_size)
+mpc52xx_ds_cached_size:
+ .long $-mpc52xx_ds_cached
diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig
new file mode 100644
index 000000000..1824536cf
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/Kconfig
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+menuconfig PPC_82xx
+ bool "82xx-based boards (PQ II)"
+ depends on PPC_BOOK3S_32
+ select FSL_SOC
+
+if PPC_82xx
+
+config EP8248E
+ bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)"
+ select CPM2
+ select PPC_INDIRECT_PCI if PCI
+ select PHYLIB if NETDEVICES
+ select MDIO_BITBANG if PHYLIB
+ help
+ This enables support for the Embedded Planet EP8248E board.
+
+ This board is also resold by Freescale as the QUICCStart
+ MPC8248 Evaluation System and/or the CWH-PPC-8248N-VE.
+
+config MGCOGE
+ bool "Keymile MGCOGE"
+ select CPM2
+ select PPC_INDIRECT_PCI if PCI
+ help
+ This enables support for the Keymile MGCOGE board.
+
+endif
diff --git a/arch/powerpc/platforms/82xx/Makefile b/arch/powerpc/platforms/82xx/Makefile
new file mode 100644
index 000000000..4fa43a5cd
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PowerPC 82xx linux kernel.
+#
+obj-$(CONFIG_CPM2) += pq2.o
+obj-$(CONFIG_EP8248E) += ep8248e.o
+obj-$(CONFIG_MGCOGE) += km82xx.o
diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c
new file mode 100644
index 000000000..3dc65ce1f
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/ep8248e.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Embedded Planet EP8248E support
+ *
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/fsl_devices.h>
+#include <linux/mdio-bitbang.h>
+#include <linux/of_mdio.h>
+#include <linux/slab.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include <asm/io.h>
+#include <asm/cpm2.h>
+#include <asm/udbg.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "pq2.h"
+
+static u8 __iomem *ep8248e_bcsr;
+static struct device_node *ep8248e_bcsr_node;
+
+#define BCSR7_SCC2_ENABLE 0x10
+
+#define BCSR8_PHY1_ENABLE 0x80
+#define BCSR8_PHY1_POWER 0x40
+#define BCSR8_PHY2_ENABLE 0x20
+#define BCSR8_PHY2_POWER 0x10
+#define BCSR8_MDIO_READ 0x04
+#define BCSR8_MDIO_CLOCK 0x02
+#define BCSR8_MDIO_DATA 0x01
+
+#define BCSR9_USB_ENABLE 0x80
+#define BCSR9_USB_POWER 0x40
+#define BCSR9_USB_HOST 0x20
+#define BCSR9_USB_FULL_SPEED_TARGET 0x10
+
+static void __init ep8248e_pic_init(void)
+{
+ struct device_node *np = of_find_compatible_node(NULL, NULL, "fsl,pq2-pic");
+ if (!np) {
+ printk(KERN_ERR "PIC init: can not find cpm-pic node\n");
+ return;
+ }
+
+ cpm2_pic_init(np);
+ of_node_put(np);
+}
+
+static void ep8248e_set_mdc(struct mdiobb_ctrl *ctrl, int level)
+{
+ if (level)
+ setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_CLOCK);
+ else
+ clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_CLOCK);
+
+ /* Read back to flush the write. */
+ in_8(&ep8248e_bcsr[8]);
+}
+
+static void ep8248e_set_mdio_dir(struct mdiobb_ctrl *ctrl, int output)
+{
+ if (output)
+ clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_READ);
+ else
+ setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_READ);
+
+ /* Read back to flush the write. */
+ in_8(&ep8248e_bcsr[8]);
+}
+
+static void ep8248e_set_mdio_data(struct mdiobb_ctrl *ctrl, int data)
+{
+ if (data)
+ setbits8(&ep8248e_bcsr[8], BCSR8_MDIO_DATA);
+ else
+ clrbits8(&ep8248e_bcsr[8], BCSR8_MDIO_DATA);
+
+ /* Read back to flush the write. */
+ in_8(&ep8248e_bcsr[8]);
+}
+
+static int ep8248e_get_mdio_data(struct mdiobb_ctrl *ctrl)
+{
+ return in_8(&ep8248e_bcsr[8]) & BCSR8_MDIO_DATA;
+}
+
+static const struct mdiobb_ops ep8248e_mdio_ops = {
+ .set_mdc = ep8248e_set_mdc,
+ .set_mdio_dir = ep8248e_set_mdio_dir,
+ .set_mdio_data = ep8248e_set_mdio_data,
+ .get_mdio_data = ep8248e_get_mdio_data,
+ .owner = THIS_MODULE,
+};
+
+static struct mdiobb_ctrl ep8248e_mdio_ctrl = {
+ .ops = &ep8248e_mdio_ops,
+};
+
+static int ep8248e_mdio_probe(struct platform_device *ofdev)
+{
+ struct mii_bus *bus;
+ struct resource res;
+ struct device_node *node;
+ int ret;
+
+ node = of_get_parent(ofdev->dev.of_node);
+ of_node_put(node);
+ if (node != ep8248e_bcsr_node)
+ return -ENODEV;
+
+ ret = of_address_to_resource(ofdev->dev.of_node, 0, &res);
+ if (ret)
+ return ret;
+
+ bus = alloc_mdio_bitbang(&ep8248e_mdio_ctrl);
+ if (!bus)
+ return -ENOMEM;
+
+ bus->name = "ep8248e-mdio-bitbang";
+ bus->parent = &ofdev->dev;
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start);
+
+ ret = of_mdiobus_register(bus, ofdev->dev.of_node);
+ if (ret)
+ goto err_free_bus;
+
+ return 0;
+err_free_bus:
+ free_mdio_bitbang(bus);
+ return ret;
+}
+
+static const struct of_device_id ep8248e_mdio_match[] = {
+ {
+ .compatible = "fsl,ep8248e-mdio-bitbang",
+ },
+ {},
+};
+
+static struct platform_driver ep8248e_mdio_driver = {
+ .driver = {
+ .name = "ep8248e-mdio-bitbang",
+ .of_match_table = ep8248e_mdio_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = ep8248e_mdio_probe,
+};
+
+struct cpm_pin {
+ int port, pin, flags;
+};
+
+static __initdata struct cpm_pin ep8248e_pins[] = {
+ /* SMC1 */
+ {2, 4, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+ /* SCC1 */
+ {2, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {3, 29, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+ /* FCC1 */
+ {0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {2, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+ /* FCC2 */
+ {1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+ /* I2C */
+ {4, 14, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {4, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+
+ /* USB */
+ {2, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {2, 24, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {3, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {3, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {3, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+};
+
+static void __init init_ioports(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ep8248e_pins); i++) {
+ const struct cpm_pin *pin = &ep8248e_pins[i];
+ cpm2_set_pin(pin->port, pin->pin, pin->flags);
+ }
+
+ cpm2_smc_clk_setup(CPM_CLK_SMC1, CPM_BRG7);
+ cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
+ cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
+ cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK8, CPM_CLK_TX); /* USB */
+ cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK11, CPM_CLK_RX);
+ cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_TX);
+ cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX);
+ cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
+}
+
+static void __init ep8248e_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("ep8248e_setup_arch()", 0);
+
+ cpm2_reset();
+
+ /* When this is set, snooping CPM DMA from RAM causes
+ * machine checks. See erratum SIU18.
+ */
+ clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP);
+
+ ep8248e_bcsr_node =
+ of_find_compatible_node(NULL, NULL, "fsl,ep8248e-bcsr");
+ if (!ep8248e_bcsr_node) {
+ printk(KERN_ERR "No bcsr in device tree\n");
+ return;
+ }
+
+ ep8248e_bcsr = of_iomap(ep8248e_bcsr_node, 0);
+ if (!ep8248e_bcsr) {
+ printk(KERN_ERR "Cannot map BCSR registers\n");
+ of_node_put(ep8248e_bcsr_node);
+ ep8248e_bcsr_node = NULL;
+ return;
+ }
+
+ setbits8(&ep8248e_bcsr[7], BCSR7_SCC2_ENABLE);
+ setbits8(&ep8248e_bcsr[8], BCSR8_PHY1_ENABLE | BCSR8_PHY1_POWER |
+ BCSR8_PHY2_ENABLE | BCSR8_PHY2_POWER);
+
+ init_ioports();
+
+ if (ppc_md.progress)
+ ppc_md.progress("ep8248e_setup_arch(), finish", 0);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .compatible = "simple-bus", },
+ { .compatible = "fsl,ep8248e-bcsr", },
+ {},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+ of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+ if (IS_ENABLED(CONFIG_MDIO_BITBANG))
+ platform_driver_register(&ep8248e_mdio_driver);
+
+ return 0;
+}
+machine_device_initcall(ep8248e, declare_of_platform_devices);
+
+define_machine(ep8248e)
+{
+ .name = "Embedded Planet EP8248E",
+ .compatible = "fsl,ep8248e",
+ .setup_arch = ep8248e_setup_arch,
+ .init_IRQ = ep8248e_pic_init,
+ .get_irq = cpm2_get_irq,
+ .restart = pq2_restart,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c
new file mode 100644
index 000000000..c86da3f2b
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/km82xx.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Keymile km82xx support
+ * Copyright 2008-2011 DENX Software Engineering GmbH
+ * Author: Heiko Schocher <hs@denx.de>
+ *
+ * based on code from:
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_platform.h>
+
+#include <linux/io.h>
+#include <asm/cpm2.h>
+#include <asm/udbg.h>
+#include <asm/machdep.h>
+#include <linux/time.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "pq2.h"
+
+static void __init km82xx_pic_init(void)
+{
+ struct device_node *np = of_find_compatible_node(NULL, NULL,
+ "fsl,pq2-pic");
+ if (!np) {
+ pr_err("PIC init: can not find cpm-pic node\n");
+ return;
+ }
+
+ cpm2_pic_init(np);
+ of_node_put(np);
+}
+
+struct cpm_pin {
+ int port, pin, flags;
+};
+
+static __initdata struct cpm_pin km82xx_pins[] = {
+ /* SMC1 */
+ {2, 4, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+ /* SMC2 */
+ {0, 8, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 9, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+ /* SCC1 */
+ {2, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+
+ /* SCC4 */
+ {2, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 24, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 9, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 8, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {3, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {3, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+ /* FCC1 */
+ {0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+
+ {2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 23, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+ /* FCC2 */
+ {1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+
+ {2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+ /* MDC */
+ {0, 13, CPM_PIN_OUTPUT | CPM_PIN_GPIO},
+
+#if defined(CONFIG_I2C_CPM)
+ /* I2C */
+ {3, 14, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN},
+ {3, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN},
+#endif
+
+ /* USB */
+ {0, 10, CPM_PIN_OUTPUT | CPM_PIN_GPIO}, /* FULL_SPEED */
+ {0, 11, CPM_PIN_OUTPUT | CPM_PIN_GPIO}, /*/SLAVE */
+ {2, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* RXN */
+ {2, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* RXP */
+ {2, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* /OE */
+ {2, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* RXCLK */
+ {3, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* TXP */
+ {3, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY}, /* TXN */
+ {3, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* RXD */
+
+ /* SPI */
+ {3, 16, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_MISO PD16 */
+ {3, 17, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_MOSI PD17 */
+ {3, 18, CPM_PIN_INPUT | CPM_PIN_SECONDARY},/* SPI_CLK PD18 */
+};
+
+static void __init init_ioports(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(km82xx_pins); i++) {
+ const struct cpm_pin *pin = &km82xx_pins[i];
+ cpm2_set_pin(pin->port, pin->pin, pin->flags);
+ }
+
+ cpm2_smc_clk_setup(CPM_CLK_SMC2, CPM_BRG8);
+ cpm2_smc_clk_setup(CPM_CLK_SMC1, CPM_BRG7);
+ cpm2_clk_setup(CPM_CLK_SCC1, CPM_CLK11, CPM_CLK_RX);
+ cpm2_clk_setup(CPM_CLK_SCC1, CPM_CLK11, CPM_CLK_TX);
+ cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK5, CPM_CLK_RTX);
+ cpm2_clk_setup(CPM_CLK_SCC4, CPM_CLK7, CPM_CLK_RX);
+ cpm2_clk_setup(CPM_CLK_SCC4, CPM_CLK8, CPM_CLK_TX);
+ cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_RX);
+ cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK9, CPM_CLK_TX);
+ cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX);
+ cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
+
+ /* Force USB FULL SPEED bit to '1' */
+ setbits32(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 10));
+ /* clear USB_SLAVE */
+ clrbits32(&cpm2_immr->im_ioport.iop_pdata, 1 << (31 - 11));
+}
+
+static void __init km82xx_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("km82xx_setup_arch()", 0);
+
+ cpm2_reset();
+
+ /* When this is set, snooping CPM DMA from RAM causes
+ * machine checks. See erratum SIU18.
+ */
+ clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_bcr, MPC82XX_BCR_PLDP);
+
+ init_ioports();
+
+ if (ppc_md.progress)
+ ppc_md.progress("km82xx_setup_arch(), finish", 0);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .compatible = "simple-bus", },
+ {},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+ of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+ return 0;
+}
+machine_device_initcall(km82xx, declare_of_platform_devices);
+
+define_machine(km82xx)
+{
+ .name = "Keymile km82xx",
+ .compatible = "keymile,km82xx",
+ .setup_arch = km82xx_setup_arch,
+ .init_IRQ = km82xx_pic_init,
+ .get_irq = cpm2_get_irq,
+ .restart = pq2_restart,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c
new file mode 100644
index 000000000..391d72a2e
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/pq2.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common PowerQUICC II code.
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ * Copyright (c) 2007 Freescale Semiconductor
+ *
+ * Based on code by Vitaly Bordug <vbordug@ru.mvista.com>
+ * pq2_restart fix by Wade Farnsworth <wfarnsworth@mvista.com>
+ * Copyright (c) 2006 MontaVista Software, Inc.
+ */
+
+#include <linux/kprobes.h>
+
+#include <asm/cpm2.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+
+#include <platforms/82xx/pq2.h>
+
+#define RMR_CSRE 0x00000001
+
+void __noreturn pq2_restart(char *cmd)
+{
+ local_irq_disable();
+ setbits32(&cpm2_immr->im_clkrst.car_rmr, RMR_CSRE);
+
+ /* Clear the ME,EE,IR & DR bits in MSR to cause checkstop */
+ mtmsr(mfmsr() & ~(MSR_ME | MSR_EE | MSR_IR | MSR_DR));
+ in_8(&cpm2_immr->im_clkrst.res[0]);
+
+ panic("Restart failed\n");
+}
+NOKPROBE_SYMBOL(pq2_restart)
diff --git a/arch/powerpc/platforms/82xx/pq2.h b/arch/powerpc/platforms/82xx/pq2.h
new file mode 100644
index 000000000..902ef0bd4
--- /dev/null
+++ b/arch/powerpc/platforms/82xx/pq2.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PQ2_H
+#define _PQ2_H
+
+void __noreturn pq2_restart(char *cmd);
+
+#ifdef CONFIG_PCI
+int pq2ads_pci_init_irq(void);
+void pq2_init_pci(void);
+#else
+static inline int pq2ads_pci_init_irq(void)
+{
+ return 0;
+}
+
+static inline void pq2_init_pci(void)
+{
+}
+#endif
+
+#endif
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
new file mode 100644
index 000000000..d355ad409
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: GPL-2.0
+menuconfig PPC_83xx
+ bool "83xx-based boards"
+ depends on PPC_BOOK3S_32
+ select PPC_UDBG_16550
+ select HAVE_PCI
+ select FSL_PCI if PCI
+ select FSL_SOC
+ select IPIC
+
+if PPC_83xx
+
+config MPC830x_RDB
+ bool "Freescale MPC830x RDB and derivatives"
+ select DEFAULT_UIMAGE
+ select PPC_MPC831x
+ select FSL_GTM
+ help
+ This option enables support for the MPC8308 RDB and MPC8308 P1M boards.
+
+config MPC831x_RDB
+ bool "Freescale MPC831x RDB"
+ select DEFAULT_UIMAGE
+ select PPC_MPC831x
+ help
+ This option enables support for the MPC8313 RDB and MPC8315 RDB boards.
+
+config MPC832x_RDB
+ bool "Freescale MPC832x RDB"
+ select DEFAULT_UIMAGE
+ select PPC_MPC832x
+ help
+ This option enables support for the MPC8323 RDB board.
+
+config MPC834x_ITX
+ bool "Freescale MPC834x ITX"
+ select DEFAULT_UIMAGE
+ select PPC_MPC834x
+ help
+ This option enables support for the MPC 834x ITX evaluation board.
+
+ Be aware that PCI initialization is the bootloader's
+ responsibility.
+
+config MPC836x_RDK
+ bool "Freescale/Logic MPC836x RDK"
+ select DEFAULT_UIMAGE
+ select FSL_GTM
+ select FSL_LBC
+ help
+ This option enables support for the MPC836x RDK Processor Board,
+ also known as ZOOM PowerQUICC Kit.
+
+config MPC837x_RDB
+ bool "Freescale MPC837x RDB/WLAN"
+ select DEFAULT_UIMAGE
+ select PPC_MPC837x
+ help
+ This option enables support for the MPC837x RDB and WLAN Boards.
+
+config ASP834x
+ bool "Analogue & Micro ASP 834x"
+ select PPC_MPC834x
+ help
+ This enables support for the Analogue & Micro ASP 83xx
+ board.
+
+config KMETER1
+ bool "Keymile KMETER1"
+ select DEFAULT_UIMAGE
+ select QUICC_ENGINE
+ help
+ This enables support for the Keymile KMETER1 board.
+
+
+endif
+
+# used for usb & gpio
+config PPC_MPC831x
+ bool
+
+# used for math-emu
+config PPC_MPC832x
+ bool
+
+# used for usb & gpio
+config PPC_MPC834x
+ bool
+
+# used for usb & gpio
+config PPC_MPC837x
+ bool
diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile
new file mode 100644
index 000000000..6fc3dba94
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PowerPC 83xx linux kernel.
+#
+obj-y := misc.o
+obj-$(CONFIG_SUSPEND) += suspend.o suspend-asm.o
+obj-$(CONFIG_MCU_MPC8349EMITX) += mcu_mpc8349emitx.o
+obj-$(CONFIG_MPC830x_RDB) += mpc830x_rdb.o
+obj-$(CONFIG_MPC831x_RDB) += mpc831x_rdb.o
+obj-$(CONFIG_MPC832x_RDB) += mpc832x_rdb.o
+obj-$(CONFIG_MPC834x_ITX) += mpc834x_itx.o
+obj-$(CONFIG_MPC836x_RDK) += mpc836x_rdk.o
+obj-$(CONFIG_MPC837x_RDB) += mpc837x_rdb.o
+obj-$(CONFIG_ASP834x) += asp834x.o
+obj-$(CONFIG_KMETER1) += km83xx.o
+obj-$(CONFIG_PPC_MPC831x) += usb_831x.o
+obj-$(CONFIG_PPC_MPC834x) += usb_834x.o
+obj-$(CONFIG_PPC_MPC837x) += usb_837x.o
diff --git a/arch/powerpc/platforms/83xx/asp834x.c b/arch/powerpc/platforms/83xx/asp834x.c
new file mode 100644
index 000000000..6870d0c34
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/asp834x.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/asp834x.c
+ *
+ * Analogue & Micro ASP8347 board specific routines
+ * clone of mpc834x_itx
+ *
+ * Copyright 2008 Codehermit
+ *
+ * Maintainer: Bryan O'Donoghue <bodonoghue@codhermit.ie>
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+
+#include "mpc83xx.h"
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init asp834x_setup_arch(void)
+{
+ mpc83xx_setup_arch();
+ mpc834x_usb_cfg();
+}
+
+machine_device_initcall(asp834x, mpc83xx_declare_of_platform_devices);
+
+define_machine(asp834x) {
+ .name = "ASP8347E",
+ .compatible = "analogue-and-micro,asp8347e",
+ .setup_arch = asp834x_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc83xx_restart,
+ .time_init = mpc83xx_time_init,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
new file mode 100644
index 000000000..2b5d187d9
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008-2011 DENX Software Engineering GmbH
+ * Author: Heiko Schocher <hs@denx.de>
+ *
+ * Description:
+ * Keymile 83xx platform specific routines.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <linux/atomic.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/irq.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <soc/fsl/qe/qe.h>
+
+#include "mpc83xx.h"
+
+#define SVR_REV(svr) (((svr) >> 0) & 0xFFFF) /* Revision field */
+
+static void __init quirk_mpc8360e_qe_enet10(void)
+{
+ /*
+ * handle mpc8360E Erratum QE_ENET10:
+ * RGMII AC values do not meet the specification
+ */
+ uint svid = mfspr(SPRN_SVR);
+ struct device_node *np_par;
+ struct resource res;
+ void __iomem *base;
+ int ret;
+
+ np_par = of_find_node_by_name(NULL, "par_io");
+ if (np_par == NULL) {
+ pr_warn("%s couldn't find par_io node\n", __func__);
+ return;
+ }
+ /* Map Parallel I/O ports registers */
+ ret = of_address_to_resource(np_par, 0, &res);
+ if (ret) {
+ pr_warn("%s couldn't map par_io registers\n", __func__);
+ goto out;
+ }
+
+ base = ioremap(res.start, resource_size(&res));
+ if (!base)
+ goto out;
+
+ /*
+ * set output delay adjustments to default values according
+ * table 5 in Errata Rev. 5, 9/2011:
+ *
+ * write 0b01 to UCC1 bits 18:19
+ * write 0b01 to UCC2 option 1 bits 4:5
+ * write 0b01 to UCC2 option 2 bits 16:17
+ */
+ clrsetbits_be32((base + 0xa8), 0x0c00f000, 0x04005000);
+
+ /*
+ * set output delay adjustments to default values according
+ * table 3-13 in Reference Manual Rev.3 05/2010:
+ *
+ * write 0b01 to UCC2 option 2 bits 16:17
+ * write 0b0101 to UCC1 bits 20:23
+ * write 0b0101 to UCC2 option 1 bits 24:27
+ */
+ clrsetbits_be32((base + 0xac), 0x0000cff0, 0x00004550);
+
+ if (SVR_REV(svid) == 0x0021) {
+ /*
+ * UCC2 option 1: write 0b1010 to bits 24:27
+ * at address IMMRBAR+0x14AC
+ */
+ clrsetbits_be32((base + 0xac), 0x000000f0, 0x000000a0);
+ } else if (SVR_REV(svid) == 0x0020) {
+ /*
+ * UCC1: write 0b11 to bits 18:19
+ * at address IMMRBAR+0x14A8
+ */
+ setbits32((base + 0xa8), 0x00003000);
+
+ /*
+ * UCC2 option 1: write 0b11 to bits 4:5
+ * at address IMMRBAR+0x14A8
+ */
+ setbits32((base + 0xa8), 0x0c000000);
+
+ /*
+ * UCC2 option 2: write 0b11 to bits 16:17
+ * at address IMMRBAR+0x14AC
+ */
+ setbits32((base + 0xac), 0x0000c000);
+ }
+ iounmap(base);
+out:
+ of_node_put(np_par);
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc83xx_km_setup_arch(void)
+{
+#ifdef CONFIG_QUICC_ENGINE
+ struct device_node *np;
+#endif
+
+ mpc83xx_setup_arch();
+
+#ifdef CONFIG_QUICC_ENGINE
+ np = of_find_node_by_name(NULL, "par_io");
+ if (np != NULL) {
+ par_io_init(np);
+ of_node_put(np);
+
+ for_each_node_by_name(np, "spi")
+ par_io_of_config(np);
+
+ for_each_node_by_name(np, "ucc")
+ par_io_of_config(np);
+
+ /* Only apply this quirk when par_io is available */
+ np = of_find_compatible_node(NULL, "network", "ucc_geth");
+ if (np != NULL) {
+ quirk_mpc8360e_qe_enet10();
+ of_node_put(np);
+ }
+ }
+#endif /* CONFIG_QUICC_ENGINE */
+}
+
+machine_device_initcall(mpc83xx_km, mpc83xx_declare_of_platform_devices);
+
+/* list of the supported boards */
+static char *board[] __initdata = {
+ "Keymile,KMETER1",
+ "Keymile,kmpbec8321",
+ NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc83xx_km_probe(void)
+{
+ int i = 0;
+
+ while (board[i]) {
+ if (of_machine_is_compatible(board[i]))
+ break;
+ i++;
+ }
+ return (board[i] != NULL);
+}
+
+define_machine(mpc83xx_km) {
+ .name = "mpc83xx-km-platform",
+ .probe = mpc83xx_km_probe,
+ .setup_arch = mpc83xx_km_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc83xx_restart,
+ .time_init = mpc83xx_time_init,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
new file mode 100644
index 000000000..4d8fa9ed1
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Power Management and GPIO expander driver for MPC8349E-mITX-compatible MCU
+ *
+ * Copyright (c) 2008 MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/i2c.h>
+#include <linux/gpio/driver.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/property.h>
+#include <linux/reboot.h>
+#include <asm/machdep.h>
+
+/*
+ * I don't have specifications for the MCU firmware, I found this register
+ * and bits positions by the trial&error method.
+ */
+#define MCU_REG_CTRL 0x20
+#define MCU_CTRL_POFF 0x40
+#define MCU_CTRL_BTN 0x80
+
+#define MCU_NUM_GPIO 2
+
+struct mcu {
+ struct mutex lock;
+ struct i2c_client *client;
+ struct gpio_chip gc;
+ u8 reg_ctrl;
+};
+
+static struct mcu *glob_mcu;
+
+struct task_struct *shutdown_thread;
+static int shutdown_thread_fn(void *data)
+{
+ int ret;
+ struct mcu *mcu = glob_mcu;
+
+ while (!kthread_should_stop()) {
+ ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL);
+ if (ret < 0)
+ pr_err("MCU status reg read failed.\n");
+ mcu->reg_ctrl = ret;
+
+
+ if (mcu->reg_ctrl & MCU_CTRL_BTN) {
+ i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL,
+ mcu->reg_ctrl & ~MCU_CTRL_BTN);
+
+ ctrl_alt_del();
+ }
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ);
+ }
+
+ return 0;
+}
+
+static ssize_t show_status(struct device *d,
+ struct device_attribute *attr, char *buf)
+{
+ int ret;
+ struct mcu *mcu = glob_mcu;
+
+ ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL);
+ if (ret < 0)
+ return -ENODEV;
+ mcu->reg_ctrl = ret;
+
+ return sprintf(buf, "%02x\n", ret);
+}
+static DEVICE_ATTR(status, 0444, show_status, NULL);
+
+static void mcu_power_off(void)
+{
+ struct mcu *mcu = glob_mcu;
+
+ pr_info("Sending power-off request to the MCU...\n");
+ mutex_lock(&mcu->lock);
+ i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL,
+ mcu->reg_ctrl | MCU_CTRL_POFF);
+ mutex_unlock(&mcu->lock);
+}
+
+static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+ struct mcu *mcu = gpiochip_get_data(gc);
+ u8 bit = 1 << (4 + gpio);
+
+ mutex_lock(&mcu->lock);
+ if (val)
+ mcu->reg_ctrl &= ~bit;
+ else
+ mcu->reg_ctrl |= bit;
+
+ i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, mcu->reg_ctrl);
+ mutex_unlock(&mcu->lock);
+}
+
+static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+ mcu_gpio_set(gc, gpio, val);
+ return 0;
+}
+
+static int mcu_gpiochip_add(struct mcu *mcu)
+{
+ struct device *dev = &mcu->client->dev;
+ struct gpio_chip *gc = &mcu->gc;
+
+ gc->owner = THIS_MODULE;
+ gc->label = kasprintf(GFP_KERNEL, "%pfw", dev_fwnode(dev));
+ gc->can_sleep = 1;
+ gc->ngpio = MCU_NUM_GPIO;
+ gc->base = -1;
+ gc->set = mcu_gpio_set;
+ gc->direction_output = mcu_gpio_dir_out;
+ gc->parent = dev;
+
+ return gpiochip_add_data(gc, mcu);
+}
+
+static void mcu_gpiochip_remove(struct mcu *mcu)
+{
+ kfree(mcu->gc.label);
+ gpiochip_remove(&mcu->gc);
+}
+
+static int mcu_probe(struct i2c_client *client)
+{
+ struct mcu *mcu;
+ int ret;
+
+ mcu = kzalloc(sizeof(*mcu), GFP_KERNEL);
+ if (!mcu)
+ return -ENOMEM;
+
+ mutex_init(&mcu->lock);
+ mcu->client = client;
+ i2c_set_clientdata(client, mcu);
+
+ ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL);
+ if (ret < 0)
+ goto err;
+ mcu->reg_ctrl = ret;
+
+ ret = mcu_gpiochip_add(mcu);
+ if (ret)
+ goto err;
+
+ /* XXX: this is potentially racy, but there is no lock for pm_power_off */
+ if (!pm_power_off) {
+ glob_mcu = mcu;
+ pm_power_off = mcu_power_off;
+ dev_info(&client->dev, "will provide power-off service\n");
+ }
+
+ if (device_create_file(&client->dev, &dev_attr_status))
+ dev_err(&client->dev,
+ "couldn't create device file for status\n");
+
+ shutdown_thread = kthread_run(shutdown_thread_fn, NULL,
+ "mcu-i2c-shdn");
+
+ return 0;
+err:
+ kfree(mcu);
+ return ret;
+}
+
+static void mcu_remove(struct i2c_client *client)
+{
+ struct mcu *mcu = i2c_get_clientdata(client);
+
+ kthread_stop(shutdown_thread);
+
+ device_remove_file(&client->dev, &dev_attr_status);
+
+ if (glob_mcu == mcu) {
+ pm_power_off = NULL;
+ glob_mcu = NULL;
+ }
+
+ mcu_gpiochip_remove(mcu);
+ kfree(mcu);
+}
+
+static const struct i2c_device_id mcu_ids[] = {
+ { "mcu-mpc8349emitx", },
+ {},
+};
+MODULE_DEVICE_TABLE(i2c, mcu_ids);
+
+static const struct of_device_id mcu_of_match_table[] = {
+ { .compatible = "fsl,mcu-mpc8349emitx", },
+ { },
+};
+
+static struct i2c_driver mcu_driver = {
+ .driver = {
+ .name = "mcu-mpc8349emitx",
+ .of_match_table = mcu_of_match_table,
+ },
+ .probe = mcu_probe,
+ .remove = mcu_remove,
+ .id_table = mcu_ids,
+};
+
+module_i2c_driver(mcu_driver);
+
+MODULE_DESCRIPTION("Power Management and GPIO expander driver for "
+ "MPC8349E-mITX-compatible MCU");
+MODULE_AUTHOR("Anton Vorontsov <avorontsov@ru.mvista.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c
new file mode 100644
index 000000000..2fb2a85d1
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/misc.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * misc setup functions for MPC83xx
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/pci.h>
+
+#include <asm/debug.h>
+#include <asm/io.h>
+#include <asm/hw_irq.h>
+#include <asm/ipic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include <mm/mmu_decl.h>
+
+#include "mpc83xx.h"
+
+static __be32 __iomem *restart_reg_base;
+
+static int __init mpc83xx_restart_init(void)
+{
+ /* map reset restart_reg_baseister space */
+ restart_reg_base = ioremap(get_immrbase() + 0x900, 0xff);
+
+ return 0;
+}
+
+arch_initcall(mpc83xx_restart_init);
+
+void __noreturn mpc83xx_restart(char *cmd)
+{
+#define RST_OFFSET 0x00000900
+#define RST_PROT_REG 0x00000018
+#define RST_CTRL_REG 0x0000001c
+
+ local_irq_disable();
+
+ if (restart_reg_base) {
+ /* enable software reset "RSTE" */
+ out_be32(restart_reg_base + (RST_PROT_REG >> 2), 0x52535445);
+
+ /* set software hard reset */
+ out_be32(restart_reg_base + (RST_CTRL_REG >> 2), 0x2);
+ } else {
+ printk (KERN_EMERG "Error: Restart registers not mapped, spinning!\n");
+ }
+
+ for (;;) ;
+}
+
+long __init mpc83xx_time_init(void)
+{
+#define SPCR_OFFSET 0x00000110
+#define SPCR_TBEN 0x00400000
+ __be32 __iomem *spcr = ioremap(get_immrbase() + SPCR_OFFSET, 4);
+ __be32 tmp;
+
+ tmp = in_be32(spcr);
+ out_be32(spcr, tmp | SPCR_TBEN);
+
+ iounmap(spcr);
+
+ return 0;
+}
+
+void __init mpc83xx_ipic_init_IRQ(void)
+{
+ struct device_node *np;
+
+ /* looking for fsl,pq2pro-pic which is asl compatible with fsl,ipic */
+ np = of_find_compatible_node(NULL, NULL, "fsl,ipic");
+ if (!np)
+ np = of_find_node_by_type(NULL, "ipic");
+ if (!np)
+ return;
+
+ ipic_init(np, 0);
+
+ of_node_put(np);
+
+ /* Initialize the default interrupt mapping priorities,
+ * in case the boot rom changed something on us.
+ */
+ ipic_set_default_priority();
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .type = "soc", },
+ { .compatible = "soc", },
+ { .compatible = "simple-bus" },
+ { .compatible = "gianfar" },
+ { .compatible = "gpio-leds", },
+ { .type = "qe", },
+ { .compatible = "fsl,qe", },
+ {},
+};
+
+int __init mpc83xx_declare_of_platform_devices(void)
+{
+ of_platform_bus_probe(NULL, of_bus_ids, NULL);
+ return 0;
+}
+
+#ifdef CONFIG_PCI
+void __init mpc83xx_setup_pci(void)
+{
+ struct device_node *np;
+
+ for_each_compatible_node(np, "pci", "fsl,mpc8349-pci")
+ mpc83xx_add_bridge(np);
+ for_each_compatible_node(np, "pci", "fsl,mpc8314-pcie")
+ mpc83xx_add_bridge(np);
+}
+#endif
+
+void __init mpc83xx_setup_arch(void)
+{
+ phys_addr_t immrbase = get_immrbase();
+ int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M;
+ unsigned long va = fix_to_virt(FIX_IMMR_BASE);
+
+ if (ppc_md.progress)
+ ppc_md.progress("mpc83xx_setup_arch()", 0);
+
+ setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG);
+ update_bats();
+}
+
+int machine_check_83xx(struct pt_regs *regs)
+{
+ u32 mask = 1 << (31 - IPIC_MCP_WDT);
+
+ if (!(regs->msr & SRR1_MCE_MCP) || !(ipic_get_mcp_status() & mask))
+ return machine_check_generic(regs);
+ ipic_clear_mcp_status(mask);
+
+ if (debugger_fault_handler(regs))
+ return 1;
+
+ die("Watchdog NMI Reset", regs, 0);
+
+ return 1;
+}
diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
new file mode 100644
index 000000000..534bb2274
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/mpc830x_rdb.c
+ *
+ * Description: MPC830x RDB board specific routines.
+ * This file is based on mpc831x_rdb.c
+ *
+ * Copyright (C) Freescale Semiconductor, Inc. 2009. All rights reserved.
+ * Copyright (C) 2010. Ilya Yanok, Emcraft Systems, yanok@emcraft.com
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include "mpc83xx.h"
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc830x_rdb_setup_arch(void)
+{
+ mpc83xx_setup_arch();
+ mpc831x_usb_cfg();
+}
+
+static const char *board[] __initdata = {
+ "MPC8308RDB",
+ "fsl,mpc8308rdb",
+ "denx,mpc8308_p1m",
+ NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc830x_rdb_probe(void)
+{
+ return of_device_compatible_match(of_root, board);
+}
+
+machine_device_initcall(mpc830x_rdb, mpc83xx_declare_of_platform_devices);
+
+define_machine(mpc830x_rdb) {
+ .name = "MPC830x RDB",
+ .probe = mpc830x_rdb_probe,
+ .setup_arch = mpc830x_rdb_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc83xx_restart,
+ .time_init = mpc83xx_time_init,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
new file mode 100644
index 000000000..7b901ab3b
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/mpc831x_rdb.c
+ *
+ * Description: MPC831x RDB board specific routines.
+ * This file is based on mpc834x_sys.c
+ * Author: Lo Wlison <r43300@freescale.com>
+ *
+ * Copyright (C) Freescale Semiconductor, Inc. 2006. All rights reserved.
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc831x_rdb_setup_arch(void)
+{
+ mpc83xx_setup_arch();
+ mpc831x_usb_cfg();
+}
+
+static const char *board[] __initdata = {
+ "MPC8313ERDB",
+ "fsl,mpc8315erdb",
+ NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc831x_rdb_probe(void)
+{
+ return of_device_compatible_match(of_root, board);
+}
+
+machine_device_initcall(mpc831x_rdb, mpc83xx_declare_of_platform_devices);
+
+define_machine(mpc831x_rdb) {
+ .name = "MPC831x RDB",
+ .probe = mpc831x_rdb_probe,
+ .setup_arch = mpc831x_rdb_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc83xx_restart,
+ .time_init = mpc83xx_time_init,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
new file mode 100644
index 000000000..d523ce0f4
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/mpc832x_rdb.c
+ *
+ * Copyright (C) Freescale Semiconductor, Inc. 2007. All rights reserved.
+ *
+ * Description:
+ * MPC832x RDB board specific routines.
+ * This file is based on mpc832x_mds.c and mpc8313_rdb.c
+ * Author: Michael Barkowski <michael.barkowski@freescale.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/mmc_spi.h>
+#include <linux/mmc/host.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/fsl_devices.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <soc/fsl/qe/qe.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef CONFIG_QUICC_ENGINE
+static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
+ struct spi_board_info *board_infos,
+ unsigned int num_board_infos,
+ void (*cs_control)(struct spi_device *dev,
+ bool on))
+{
+ struct device_node *np;
+ unsigned int i = 0;
+
+ for_each_compatible_node(np, type, compatible) {
+ int ret;
+ unsigned int j;
+ const void *prop;
+ struct resource res[2];
+ struct platform_device *pdev;
+ struct fsl_spi_platform_data pdata = {
+ .cs_control = cs_control,
+ };
+
+ memset(res, 0, sizeof(res));
+
+ pdata.sysclk = sysclk;
+
+ prop = of_get_property(np, "reg", NULL);
+ if (!prop)
+ goto err;
+ pdata.bus_num = *(u32 *)prop;
+
+ prop = of_get_property(np, "cell-index", NULL);
+ if (prop)
+ i = *(u32 *)prop;
+
+ prop = of_get_property(np, "mode", NULL);
+ if (prop && !strcmp(prop, "cpu-qe"))
+ pdata.flags = SPI_QE_CPU_MODE;
+
+ for (j = 0; j < num_board_infos; j++) {
+ if (board_infos[j].bus_num == pdata.bus_num)
+ pdata.max_chipselect++;
+ }
+
+ if (!pdata.max_chipselect)
+ continue;
+
+ ret = of_address_to_resource(np, 0, &res[0]);
+ if (ret)
+ goto err;
+
+ ret = of_irq_to_resource(np, 0, &res[1]);
+ if (ret <= 0)
+ goto err;
+
+ pdev = platform_device_alloc("mpc83xx_spi", i);
+ if (!pdev)
+ goto err;
+
+ ret = platform_device_add_data(pdev, &pdata, sizeof(pdata));
+ if (ret)
+ goto unreg;
+
+ ret = platform_device_add_resources(pdev, res,
+ ARRAY_SIZE(res));
+ if (ret)
+ goto unreg;
+
+ ret = platform_device_add(pdev);
+ if (ret)
+ goto unreg;
+
+ goto next;
+unreg:
+ platform_device_put(pdev);
+err:
+ pr_err("%pOF: registration failed\n", np);
+next:
+ i++;
+ }
+
+ return i;
+}
+
+static int __init fsl_spi_init(struct spi_board_info *board_infos,
+ unsigned int num_board_infos,
+ void (*cs_control)(struct spi_device *spi,
+ bool on))
+{
+ u32 sysclk = -1;
+ int ret;
+
+ /* SPI controller is either clocked from QE or SoC clock */
+ sysclk = get_brgfreq();
+ if (sysclk == -1) {
+ sysclk = fsl_get_sys_freq();
+ if (sysclk == -1)
+ return -ENODEV;
+ }
+
+ ret = of_fsl_spi_probe(NULL, "fsl,spi", sysclk, board_infos,
+ num_board_infos, cs_control);
+ if (!ret)
+ of_fsl_spi_probe("spi", "fsl_spi", sysclk, board_infos,
+ num_board_infos, cs_control);
+
+ return spi_register_board_info(board_infos, num_board_infos);
+}
+
+static void mpc83xx_spi_cs_control(struct spi_device *spi, bool on)
+{
+ pr_debug("%s %d %d\n", __func__, spi_get_chipselect(spi, 0), on);
+ par_io_data_set(3, 13, on);
+}
+
+static struct mmc_spi_platform_data mpc832x_mmc_pdata = {
+ .ocr_mask = MMC_VDD_33_34,
+};
+
+static struct spi_board_info mpc832x_spi_boardinfo = {
+ .bus_num = 0x4c0,
+ .chip_select = 0,
+ .max_speed_hz = 50000000,
+ .modalias = "mmc_spi",
+ .platform_data = &mpc832x_mmc_pdata,
+};
+
+static int __init mpc832x_spi_init(void)
+{
+ struct device_node *np;
+
+ par_io_config_pin(3, 0, 3, 0, 1, 0); /* SPI1 MOSI, I/O */
+ par_io_config_pin(3, 1, 3, 0, 1, 0); /* SPI1 MISO, I/O */
+ par_io_config_pin(3, 2, 3, 0, 1, 0); /* SPI1 CLK, I/O */
+ par_io_config_pin(3, 3, 2, 0, 1, 0); /* SPI1 SEL, I */
+
+ par_io_config_pin(3, 13, 1, 0, 0, 0); /* !SD_CS, O */
+ par_io_config_pin(3, 14, 2, 0, 0, 0); /* SD_INSERT, I */
+ par_io_config_pin(3, 15, 2, 0, 0, 0); /* SD_PROTECT,I */
+
+ /*
+ * Don't bother with legacy stuff when device tree contains
+ * mmc-spi-slot node.
+ */
+ np = of_find_compatible_node(NULL, NULL, "mmc-spi-slot");
+ of_node_put(np);
+ if (np)
+ return 0;
+ return fsl_spi_init(&mpc832x_spi_boardinfo, 1, mpc83xx_spi_cs_control);
+}
+machine_device_initcall(mpc832x_rdb, mpc832x_spi_init);
+#endif /* CONFIG_QUICC_ENGINE */
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc832x_rdb_setup_arch(void)
+{
+#if defined(CONFIG_QUICC_ENGINE)
+ struct device_node *np;
+#endif
+
+ mpc83xx_setup_arch();
+
+#ifdef CONFIG_QUICC_ENGINE
+ if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
+ par_io_init(np);
+ of_node_put(np);
+
+ for_each_node_by_name(np, "ucc")
+ par_io_of_config(np);
+ }
+#endif /* CONFIG_QUICC_ENGINE */
+}
+
+machine_device_initcall(mpc832x_rdb, mpc83xx_declare_of_platform_devices);
+
+define_machine(mpc832x_rdb) {
+ .name = "MPC832x RDB",
+ .compatible = "MPC832xRDB",
+ .setup_arch = mpc832x_rdb_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc83xx_restart,
+ .time_init = mpc83xx_time_init,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c
new file mode 100644
index 000000000..e45b98ff0
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/mpc834x_itx.c
+ *
+ * MPC834x ITX board specific routines
+ *
+ * Maintainer: Kumar Gala <galak@kernel.crashing.org>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/of_platform.h>
+
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ipic.h>
+#include <asm/irq.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+static const struct of_device_id mpc834x_itx_ids[] __initconst = {
+ { .compatible = "fsl,pq2pro-localbus", },
+ {},
+};
+
+static int __init mpc834x_itx_declare_of_platform_devices(void)
+{
+ mpc83xx_declare_of_platform_devices();
+ return of_platform_bus_probe(NULL, mpc834x_itx_ids, NULL);
+}
+machine_device_initcall(mpc834x_itx, mpc834x_itx_declare_of_platform_devices);
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc834x_itx_setup_arch(void)
+{
+ mpc83xx_setup_arch();
+
+ mpc834x_usb_cfg();
+}
+
+define_machine(mpc834x_itx) {
+ .name = "MPC834x ITX",
+ .compatible = "MPC834xMITX",
+ .setup_arch = mpc834x_itx_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc83xx_restart,
+ .time_init = mpc83xx_time_init,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
new file mode 100644
index 000000000..1fc9d1235
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8360E-RDK board file.
+ *
+ * Copyright (c) 2006 Freescale Semiconductor, Inc.
+ * Copyright (c) 2007-2008 MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <soc/fsl/qe/qe.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+machine_device_initcall(mpc836x_rdk, mpc83xx_declare_of_platform_devices);
+
+static void __init mpc836x_rdk_setup_arch(void)
+{
+ mpc83xx_setup_arch();
+}
+
+define_machine(mpc836x_rdk) {
+ .name = "MPC836x RDK",
+ .compatible = "fsl,mpc8360rdk",
+ .setup_arch = mpc836x_rdk_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc83xx_restart,
+ .time_init = mpc83xx_time_init,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
new file mode 100644
index 000000000..39e78018d
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/83xx/mpc837x_rdb.c
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * MPC837x RDB board specific routines
+ */
+
+#include <linux/pci.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/ipic.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc83xx.h"
+
+static void __init mpc837x_rdb_sd_cfg(void)
+{
+ void __iomem *im;
+
+ im = ioremap(get_immrbase(), 0x1000);
+ if (!im) {
+ WARN_ON(1);
+ return;
+ }
+
+ /*
+ * On RDB boards (in contrast to MDS) USBB pins are used for SD only,
+ * so we can safely mux them away from the USB block.
+ */
+ clrsetbits_be32(im + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USBB_MASK,
+ MPC837X_SICRL_SD);
+ clrsetbits_be32(im + MPC83XX_SICRH_OFFS, MPC837X_SICRH_SPI_MASK,
+ MPC837X_SICRH_SD);
+ iounmap(im);
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init mpc837x_rdb_setup_arch(void)
+{
+ mpc83xx_setup_arch();
+ mpc837x_usb_cfg();
+ mpc837x_rdb_sd_cfg();
+}
+
+machine_device_initcall(mpc837x_rdb, mpc83xx_declare_of_platform_devices);
+
+static const char * const board[] __initconst = {
+ "fsl,mpc8377rdb",
+ "fsl,mpc8378rdb",
+ "fsl,mpc8379rdb",
+ "fsl,mpc8377wlan",
+ NULL
+};
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init mpc837x_rdb_probe(void)
+{
+ return of_device_compatible_match(of_root, board);
+}
+
+define_machine(mpc837x_rdb) {
+ .name = "MPC837x RDB/WLAN",
+ .probe = mpc837x_rdb_probe,
+ .setup_arch = mpc837x_rdb_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
+ .get_irq = ipic_get_irq,
+ .restart = mpc83xx_restart,
+ .time_init = mpc83xx_time_init,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
new file mode 100644
index 000000000..0b8738a2b
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __MPC83XX_H__
+#define __MPC83XX_H__
+
+#include <linux/init.h>
+
+/* System Clock Control Register */
+#define MPC83XX_SCCR_OFFS 0xA08
+#define MPC83XX_SCCR_USB_MASK 0x00f00000
+#define MPC83XX_SCCR_USB_MPHCM_11 0x00c00000
+#define MPC83XX_SCCR_USB_MPHCM_01 0x00400000
+#define MPC83XX_SCCR_USB_MPHCM_10 0x00800000
+#define MPC83XX_SCCR_USB_DRCM_11 0x00300000
+#define MPC83XX_SCCR_USB_DRCM_01 0x00100000
+#define MPC83XX_SCCR_USB_DRCM_10 0x00200000
+#define MPC8315_SCCR_USB_MASK 0x00c00000
+#define MPC8315_SCCR_USB_DRCM_11 0x00c00000
+#define MPC8315_SCCR_USB_DRCM_01 0x00400000
+#define MPC837X_SCCR_USB_DRCM_11 0x00c00000
+
+/* system i/o configuration register low */
+#define MPC83XX_SICRL_OFFS 0x114
+#define MPC834X_SICRL_USB_MASK 0x60000000
+#define MPC834X_SICRL_USB0 0x20000000
+#define MPC834X_SICRL_USB1 0x40000000
+#define MPC831X_SICRL_USB_MASK 0x00000c00
+#define MPC831X_SICRL_USB_ULPI 0x00000800
+#define MPC8315_SICRL_USB_MASK 0x000000fc
+#define MPC8315_SICRL_USB_ULPI 0x00000054
+#define MPC837X_SICRL_USB_MASK 0xf0000000
+#define MPC837X_SICRL_USB_ULPI 0x50000000
+#define MPC837X_SICRL_USBB_MASK 0x30000000
+#define MPC837X_SICRL_SD 0x20000000
+
+/* system i/o configuration register high */
+#define MPC83XX_SICRH_OFFS 0x118
+#define MPC8308_SICRH_USB_MASK 0x000c0000
+#define MPC8308_SICRH_USB_ULPI 0x00040000
+#define MPC834X_SICRH_USB_UTMI 0x00020000
+#define MPC831X_SICRH_USB_MASK 0x000000e0
+#define MPC831X_SICRH_USB_ULPI 0x000000a0
+#define MPC8315_SICRH_USB_MASK 0x0000ff00
+#define MPC8315_SICRH_USB_ULPI 0x00000000
+#define MPC837X_SICRH_SPI_MASK 0x00000003
+#define MPC837X_SICRH_SD 0x00000001
+
+/* USB Control Register */
+#define FSL_USB2_CONTROL_OFFS 0x500
+#define CONTROL_UTMI_PHY_EN 0x00000200
+#define CONTROL_REFSEL_24MHZ 0x00000040
+#define CONTROL_REFSEL_48MHZ 0x00000080
+#define CONTROL_PHY_CLK_SEL_ULPI 0x00000400
+#define CONTROL_OTG_PORT 0x00000020
+
+/* USB PORTSC Registers */
+#define FSL_USB2_PORTSC1_OFFS 0x184
+#define FSL_USB2_PORTSC2_OFFS 0x188
+#define PORTSCX_PTW_16BIT 0x10000000
+#define PORTSCX_PTS_UTMI 0x00000000
+#define PORTSCX_PTS_ULPI 0x80000000
+
+/*
+ * Declaration for the various functions exported by the
+ * mpc83xx_* files. Mostly for use by mpc83xx_setup
+ */
+
+extern void __noreturn mpc83xx_restart(char *cmd);
+extern long mpc83xx_time_init(void);
+int __init mpc837x_usb_cfg(void);
+int __init mpc834x_usb_cfg(void);
+int __init mpc831x_usb_cfg(void);
+extern void mpc83xx_ipic_init_IRQ(void);
+
+#ifdef CONFIG_PCI
+extern void mpc83xx_setup_pci(void);
+#else
+#define mpc83xx_setup_pci NULL
+#endif
+
+extern int mpc83xx_declare_of_platform_devices(void);
+extern void mpc83xx_setup_arch(void);
+
+#endif /* __MPC83XX_H__ */
diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S
new file mode 100644
index 000000000..bc6bd4d0a
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/suspend-asm.S
@@ -0,0 +1,551 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Enter and leave deep sleep state on MPC83xx
+ *
+ * Copyright (c) 2006-2008 Freescale Semiconductor, Inc.
+ * Author: Scott Wood <scottwood@freescale.com>
+ */
+
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+
+#define SS_MEMSAVE 0x00 /* First 8 bytes of RAM */
+#define SS_HID 0x08 /* 3 HIDs */
+#define SS_IABR 0x14 /* 2 IABRs */
+#define SS_IBCR 0x1c
+#define SS_DABR 0x20 /* 2 DABRs */
+#define SS_DBCR 0x28
+#define SS_SP 0x2c
+#define SS_SR 0x30 /* 16 segment registers */
+#define SS_R2 0x70
+#define SS_MSR 0x74
+#define SS_SDR1 0x78
+#define SS_LR 0x7c
+#define SS_SPRG 0x80 /* 8 SPRGs */
+#define SS_DBAT 0xa0 /* 8 DBATs */
+#define SS_IBAT 0xe0 /* 8 IBATs */
+#define SS_TB 0x120
+#define SS_CR 0x128
+#define SS_GPREG 0x12c /* r12-r31 */
+#define STATE_SAVE_SIZE 0x17c
+
+ .section .data
+ .align 5
+
+mpc83xx_sleep_save_area:
+ .space STATE_SAVE_SIZE
+immrbase:
+ .long 0
+
+ .section .text
+ .align 5
+
+ /* r3 = physical address of IMMR */
+_GLOBAL(mpc83xx_enter_deep_sleep)
+ lis r4, immrbase@ha
+ stw r3, immrbase@l(r4)
+
+ /* The first 2 words of memory are used to communicate with the
+ * bootloader, to tell it how to resume.
+ *
+ * The first word is the magic number 0xf5153ae5, and the second
+ * is the pointer to mpc83xx_deep_resume.
+ *
+ * The original content of these two words is saved in SS_MEMSAVE.
+ */
+
+ lis r3, mpc83xx_sleep_save_area@h
+ ori r3, r3, mpc83xx_sleep_save_area@l
+
+ lis r4, KERNELBASE@h
+ lwz r5, 0(r4)
+ lwz r6, 4(r4)
+
+ stw r5, SS_MEMSAVE+0(r3)
+ stw r6, SS_MEMSAVE+4(r3)
+
+ mfspr r5, SPRN_HID0
+ mfspr r6, SPRN_HID1
+ mfspr r7, SPRN_HID2
+
+ stw r5, SS_HID+0(r3)
+ stw r6, SS_HID+4(r3)
+ stw r7, SS_HID+8(r3)
+
+ mfspr r4, SPRN_IABR
+ mfspr r5, SPRN_IABR2
+ mfspr r6, SPRN_IBCR
+ mfspr r7, SPRN_DABR
+ mfspr r8, SPRN_DABR2
+ mfspr r9, SPRN_DBCR
+
+ stw r4, SS_IABR+0(r3)
+ stw r5, SS_IABR+4(r3)
+ stw r6, SS_IBCR(r3)
+ stw r7, SS_DABR+0(r3)
+ stw r8, SS_DABR+4(r3)
+ stw r9, SS_DBCR(r3)
+
+ mfspr r4, SPRN_SPRG0
+ mfspr r5, SPRN_SPRG1
+ mfspr r6, SPRN_SPRG2
+ mfspr r7, SPRN_SPRG3
+ mfsdr1 r8
+
+ stw r4, SS_SPRG+0(r3)
+ stw r5, SS_SPRG+4(r3)
+ stw r6, SS_SPRG+8(r3)
+ stw r7, SS_SPRG+12(r3)
+ stw r8, SS_SDR1(r3)
+
+ mfspr r4, SPRN_SPRG4
+ mfspr r5, SPRN_SPRG5
+ mfspr r6, SPRN_SPRG6
+ mfspr r7, SPRN_SPRG7
+
+ stw r4, SS_SPRG+16(r3)
+ stw r5, SS_SPRG+20(r3)
+ stw r6, SS_SPRG+24(r3)
+ stw r7, SS_SPRG+28(r3)
+
+ mfspr r4, SPRN_DBAT0U
+ mfspr r5, SPRN_DBAT0L
+ mfspr r6, SPRN_DBAT1U
+ mfspr r7, SPRN_DBAT1L
+
+ stw r4, SS_DBAT+0x00(r3)
+ stw r5, SS_DBAT+0x04(r3)
+ stw r6, SS_DBAT+0x08(r3)
+ stw r7, SS_DBAT+0x0c(r3)
+
+ mfspr r4, SPRN_DBAT2U
+ mfspr r5, SPRN_DBAT2L
+ mfspr r6, SPRN_DBAT3U
+ mfspr r7, SPRN_DBAT3L
+
+ stw r4, SS_DBAT+0x10(r3)
+ stw r5, SS_DBAT+0x14(r3)
+ stw r6, SS_DBAT+0x18(r3)
+ stw r7, SS_DBAT+0x1c(r3)
+
+ mfspr r4, SPRN_DBAT4U
+ mfspr r5, SPRN_DBAT4L
+ mfspr r6, SPRN_DBAT5U
+ mfspr r7, SPRN_DBAT5L
+
+ stw r4, SS_DBAT+0x20(r3)
+ stw r5, SS_DBAT+0x24(r3)
+ stw r6, SS_DBAT+0x28(r3)
+ stw r7, SS_DBAT+0x2c(r3)
+
+ mfspr r4, SPRN_DBAT6U
+ mfspr r5, SPRN_DBAT6L
+ mfspr r6, SPRN_DBAT7U
+ mfspr r7, SPRN_DBAT7L
+
+ stw r4, SS_DBAT+0x30(r3)
+ stw r5, SS_DBAT+0x34(r3)
+ stw r6, SS_DBAT+0x38(r3)
+ stw r7, SS_DBAT+0x3c(r3)
+
+ mfspr r4, SPRN_IBAT0U
+ mfspr r5, SPRN_IBAT0L
+ mfspr r6, SPRN_IBAT1U
+ mfspr r7, SPRN_IBAT1L
+
+ stw r4, SS_IBAT+0x00(r3)
+ stw r5, SS_IBAT+0x04(r3)
+ stw r6, SS_IBAT+0x08(r3)
+ stw r7, SS_IBAT+0x0c(r3)
+
+ mfspr r4, SPRN_IBAT2U
+ mfspr r5, SPRN_IBAT2L
+ mfspr r6, SPRN_IBAT3U
+ mfspr r7, SPRN_IBAT3L
+
+ stw r4, SS_IBAT+0x10(r3)
+ stw r5, SS_IBAT+0x14(r3)
+ stw r6, SS_IBAT+0x18(r3)
+ stw r7, SS_IBAT+0x1c(r3)
+
+ mfspr r4, SPRN_IBAT4U
+ mfspr r5, SPRN_IBAT4L
+ mfspr r6, SPRN_IBAT5U
+ mfspr r7, SPRN_IBAT5L
+
+ stw r4, SS_IBAT+0x20(r3)
+ stw r5, SS_IBAT+0x24(r3)
+ stw r6, SS_IBAT+0x28(r3)
+ stw r7, SS_IBAT+0x2c(r3)
+
+ mfspr r4, SPRN_IBAT6U
+ mfspr r5, SPRN_IBAT6L
+ mfspr r6, SPRN_IBAT7U
+ mfspr r7, SPRN_IBAT7L
+
+ stw r4, SS_IBAT+0x30(r3)
+ stw r5, SS_IBAT+0x34(r3)
+ stw r6, SS_IBAT+0x38(r3)
+ stw r7, SS_IBAT+0x3c(r3)
+
+ mfmsr r4
+ mflr r5
+ mfcr r6
+
+ stw r4, SS_MSR(r3)
+ stw r5, SS_LR(r3)
+ stw r6, SS_CR(r3)
+ stw r1, SS_SP(r3)
+ stw r2, SS_R2(r3)
+
+1: mftbu r4
+ mftb r5
+ mftbu r6
+ cmpw r4, r6
+ bne 1b
+
+ stw r4, SS_TB+0(r3)
+ stw r5, SS_TB+4(r3)
+
+ stmw r12, SS_GPREG(r3)
+
+ li r4, 0
+ addi r6, r3, SS_SR-4
+1: mfsrin r5, r4
+ stwu r5, 4(r6)
+ addis r4, r4, 0x1000
+ cmpwi r4, 0
+ bne 1b
+
+ /* Disable machine checks and critical exceptions */
+ mfmsr r4
+ rlwinm r4, r4, 0, ~MSR_CE
+ rlwinm r4, r4, 0, ~MSR_ME
+ mtmsr r4
+ isync
+
+#define TMP_VIRT_IMMR 0xf0000000
+#define DEFAULT_IMMR_VALUE 0xff400000
+#define IMMRBAR_BASE 0x0000
+
+ lis r4, immrbase@ha
+ lwz r4, immrbase@l(r4)
+
+ /* Use DBAT0 to address the current IMMR space */
+
+ ori r4, r4, 0x002a
+ mtspr SPRN_DBAT0L, r4
+ lis r8, TMP_VIRT_IMMR@h
+ ori r4, r8, 0x001e /* 1 MByte accessible from Kernel Space only */
+ mtspr SPRN_DBAT0U, r4
+ isync
+
+ /* Use DBAT1 to address the original IMMR space */
+
+ lis r4, DEFAULT_IMMR_VALUE@h
+ ori r4, r4, 0x002a
+ mtspr SPRN_DBAT1L, r4
+ lis r9, (TMP_VIRT_IMMR + 0x01000000)@h
+ ori r4, r9, 0x001e /* 1 MByte accessible from Kernel Space only */
+ mtspr SPRN_DBAT1U, r4
+ isync
+
+ /* Use DBAT2 to address the beginning of RAM. This isn't done
+ * using the normal virtual mapping, because with page debugging
+ * enabled it will be read-only.
+ */
+
+ li r4, 0x0002
+ mtspr SPRN_DBAT2L, r4
+ lis r4, KERNELBASE@h
+ ori r4, r4, 0x001e /* 1 MByte accessible from Kernel Space only */
+ mtspr SPRN_DBAT2U, r4
+ isync
+
+ /* Flush the cache with our BAT, as there will be TLB misses
+ * otherwise if page debugging is enabled, and these misses
+ * will disturb the PLRU algorithm.
+ */
+
+ bl __flush_disable_L1
+
+ /* Keep the i-cache enabled, so the hack below for low-boot
+ * flash will work.
+ */
+ mfspr r3, SPRN_HID0
+ ori r3, r3, HID0_ICE
+ mtspr SPRN_HID0, r3
+ isync
+
+ lis r6, 0xf515
+ ori r6, r6, 0x3ae5
+
+ lis r7, mpc83xx_deep_resume@h
+ ori r7, r7, mpc83xx_deep_resume@l
+ tophys(r7, r7)
+
+ lis r5, KERNELBASE@h
+ stw r6, 0(r5)
+ stw r7, 4(r5)
+
+ /* Reset BARs */
+
+ li r4, 0
+ stw r4, 0x0024(r8)
+ stw r4, 0x002c(r8)
+ stw r4, 0x0034(r8)
+ stw r4, 0x003c(r8)
+ stw r4, 0x0064(r8)
+ stw r4, 0x006c(r8)
+
+ /* Rev 1 of the 8313 has problems with wakeup events that are
+ * pending during the transition to deep sleep state (such as if
+ * the PCI host sets the state to D3 and then D0 in rapid
+ * succession). This check shrinks the race window somewhat.
+ *
+ * See erratum PCI23, though the problem is not limited
+ * to PCI.
+ */
+
+ lwz r3, 0x0b04(r8)
+ andi. r3, r3, 1
+ bne- mpc83xx_deep_resume
+
+ /* Move IMMR back to the default location, following the
+ * procedure specified in the MPC8313 manual.
+ */
+ lwz r4, IMMRBAR_BASE(r8)
+ isync
+ lis r4, DEFAULT_IMMR_VALUE@h
+ stw r4, IMMRBAR_BASE(r8)
+ lis r4, KERNELBASE@h
+ lwz r4, 0(r4)
+ isync
+ lwz r4, IMMRBAR_BASE(r9)
+ mr r8, r9
+ isync
+
+ /* Check the Reset Configuration Word to see whether flash needs
+ * to be mapped at a low address or a high address.
+ */
+
+ lwz r4, 0x0904(r8)
+ andis. r4, r4, 0x0400
+ li r4, 0
+ beq boot_low
+ lis r4, 0xff80
+boot_low:
+ stw r4, 0x0020(r8)
+ lis r7, 0x8000
+ ori r7, r7, 0x0016
+
+ mfspr r5, SPRN_HID0
+ rlwinm r5, r5, 0, ~(HID0_DOZE | HID0_NAP)
+ oris r5, r5, HID0_SLEEP@h
+ mtspr SPRN_HID0, r5
+ isync
+
+ mfmsr r5
+ oris r5, r5, MSR_POW@h
+
+ /* Enable the flash mapping at the appropriate address. This
+ * mapping will override the RAM mapping if booting low, so there's
+ * no need to disable the latter. This must be done inside the same
+ * cache line as setting MSR_POW, so that no instruction fetches
+ * from RAM happen after the flash mapping is turned on.
+ */
+
+ .align 5
+ stw r7, 0x0024(r8)
+ sync
+ isync
+ mtmsr r5
+ isync
+1: b 1b
+
+mpc83xx_deep_resume:
+ lis r4, 1f@h
+ ori r4, r4, 1f@l
+ tophys(r4, r4)
+ mtsrr0 r4
+
+ mfmsr r4
+ rlwinm r4, r4, 0, ~(MSR_IR | MSR_DR)
+ mtsrr1 r4
+
+ rfi
+
+1: tlbia
+ bl __inval_enable_L1
+
+ lis r3, mpc83xx_sleep_save_area@h
+ ori r3, r3, mpc83xx_sleep_save_area@l
+ tophys(r3, r3)
+
+ lwz r5, SS_MEMSAVE+0(r3)
+ lwz r6, SS_MEMSAVE+4(r3)
+
+ stw r5, 0(0)
+ stw r6, 4(0)
+
+ lwz r5, SS_HID+0(r3)
+ lwz r6, SS_HID+4(r3)
+ lwz r7, SS_HID+8(r3)
+
+ mtspr SPRN_HID0, r5
+ mtspr SPRN_HID1, r6
+ mtspr SPRN_HID2, r7
+
+ lwz r4, SS_IABR+0(r3)
+ lwz r5, SS_IABR+4(r3)
+ lwz r6, SS_IBCR(r3)
+ lwz r7, SS_DABR+0(r3)
+ lwz r8, SS_DABR+4(r3)
+ lwz r9, SS_DBCR(r3)
+
+ mtspr SPRN_IABR, r4
+ mtspr SPRN_IABR2, r5
+ mtspr SPRN_IBCR, r6
+ mtspr SPRN_DABR, r7
+ mtspr SPRN_DABR2, r8
+ mtspr SPRN_DBCR, r9
+
+ li r4, 0
+ addi r6, r3, SS_SR-4
+1: lwzu r5, 4(r6)
+ mtsrin r5, r4
+ addis r4, r4, 0x1000
+ cmpwi r4, 0
+ bne 1b
+
+ lwz r4, SS_DBAT+0x00(r3)
+ lwz r5, SS_DBAT+0x04(r3)
+ lwz r6, SS_DBAT+0x08(r3)
+ lwz r7, SS_DBAT+0x0c(r3)
+
+ mtspr SPRN_DBAT0U, r4
+ mtspr SPRN_DBAT0L, r5
+ mtspr SPRN_DBAT1U, r6
+ mtspr SPRN_DBAT1L, r7
+
+ lwz r4, SS_DBAT+0x10(r3)
+ lwz r5, SS_DBAT+0x14(r3)
+ lwz r6, SS_DBAT+0x18(r3)
+ lwz r7, SS_DBAT+0x1c(r3)
+
+ mtspr SPRN_DBAT2U, r4
+ mtspr SPRN_DBAT2L, r5
+ mtspr SPRN_DBAT3U, r6
+ mtspr SPRN_DBAT3L, r7
+
+ lwz r4, SS_DBAT+0x20(r3)
+ lwz r5, SS_DBAT+0x24(r3)
+ lwz r6, SS_DBAT+0x28(r3)
+ lwz r7, SS_DBAT+0x2c(r3)
+
+ mtspr SPRN_DBAT4U, r4
+ mtspr SPRN_DBAT4L, r5
+ mtspr SPRN_DBAT5U, r6
+ mtspr SPRN_DBAT5L, r7
+
+ lwz r4, SS_DBAT+0x30(r3)
+ lwz r5, SS_DBAT+0x34(r3)
+ lwz r6, SS_DBAT+0x38(r3)
+ lwz r7, SS_DBAT+0x3c(r3)
+
+ mtspr SPRN_DBAT6U, r4
+ mtspr SPRN_DBAT6L, r5
+ mtspr SPRN_DBAT7U, r6
+ mtspr SPRN_DBAT7L, r7
+
+ lwz r4, SS_IBAT+0x00(r3)
+ lwz r5, SS_IBAT+0x04(r3)
+ lwz r6, SS_IBAT+0x08(r3)
+ lwz r7, SS_IBAT+0x0c(r3)
+
+ mtspr SPRN_IBAT0U, r4
+ mtspr SPRN_IBAT0L, r5
+ mtspr SPRN_IBAT1U, r6
+ mtspr SPRN_IBAT1L, r7
+
+ lwz r4, SS_IBAT+0x10(r3)
+ lwz r5, SS_IBAT+0x14(r3)
+ lwz r6, SS_IBAT+0x18(r3)
+ lwz r7, SS_IBAT+0x1c(r3)
+
+ mtspr SPRN_IBAT2U, r4
+ mtspr SPRN_IBAT2L, r5
+ mtspr SPRN_IBAT3U, r6
+ mtspr SPRN_IBAT3L, r7
+
+ lwz r4, SS_IBAT+0x20(r3)
+ lwz r5, SS_IBAT+0x24(r3)
+ lwz r6, SS_IBAT+0x28(r3)
+ lwz r7, SS_IBAT+0x2c(r3)
+
+ mtspr SPRN_IBAT4U, r4
+ mtspr SPRN_IBAT4L, r5
+ mtspr SPRN_IBAT5U, r6
+ mtspr SPRN_IBAT5L, r7
+
+ lwz r4, SS_IBAT+0x30(r3)
+ lwz r5, SS_IBAT+0x34(r3)
+ lwz r6, SS_IBAT+0x38(r3)
+ lwz r7, SS_IBAT+0x3c(r3)
+
+ mtspr SPRN_IBAT6U, r4
+ mtspr SPRN_IBAT6L, r5
+ mtspr SPRN_IBAT7U, r6
+ mtspr SPRN_IBAT7L, r7
+
+ lwz r4, SS_SPRG+16(r3)
+ lwz r5, SS_SPRG+20(r3)
+ lwz r6, SS_SPRG+24(r3)
+ lwz r7, SS_SPRG+28(r3)
+
+ mtspr SPRN_SPRG4, r4
+ mtspr SPRN_SPRG5, r5
+ mtspr SPRN_SPRG6, r6
+ mtspr SPRN_SPRG7, r7
+
+ lwz r4, SS_SPRG+0(r3)
+ lwz r5, SS_SPRG+4(r3)
+ lwz r6, SS_SPRG+8(r3)
+ lwz r7, SS_SPRG+12(r3)
+ lwz r8, SS_SDR1(r3)
+
+ mtspr SPRN_SPRG0, r4
+ mtspr SPRN_SPRG1, r5
+ mtspr SPRN_SPRG2, r6
+ mtspr SPRN_SPRG3, r7
+ mtsdr1 r8
+
+ lwz r4, SS_MSR(r3)
+ lwz r5, SS_LR(r3)
+ lwz r6, SS_CR(r3)
+ lwz r1, SS_SP(r3)
+ lwz r2, SS_R2(r3)
+
+ mtsrr1 r4
+ mtsrr0 r5
+ mtcr r6
+
+ li r4, 0
+ mtspr SPRN_TBWL, r4
+
+ lwz r4, SS_TB+0(r3)
+ lwz r5, SS_TB+4(r3)
+
+ mtspr SPRN_TBWU, r4
+ mtspr SPRN_TBWL, r5
+
+ lmw r12, SS_GPREG(r3)
+
+ /* Kick decrementer */
+ li r0, 1
+ mtdec r0
+
+ rfi
+_ASM_NOKPROBE_SYMBOL(mpc83xx_deep_resume)
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
new file mode 100644
index 000000000..9833c36bd
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MPC83xx suspend support
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2006-2007 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/pm.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/wait.h>
+#include <linux/sched/signal.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/suspend.h>
+#include <linux/fsl_devices.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/export.h>
+
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/mpc6xx.h>
+#include <asm/switch_to.h>
+
+#include <sysdev/fsl_soc.h>
+
+#define PMCCR1_NEXT_STATE 0x0C /* Next state for power management */
+#define PMCCR1_NEXT_STATE_SHIFT 2
+#define PMCCR1_CURR_STATE 0x03 /* Current state for power management*/
+#define IMMR_SYSCR_OFFSET 0x100
+#define IMMR_RCW_OFFSET 0x900
+#define RCW_PCI_HOST 0x80000000
+
+void mpc83xx_enter_deep_sleep(phys_addr_t immrbase);
+
+struct mpc83xx_pmc {
+ u32 config;
+#define PMCCR_DLPEN 2 /* DDR SDRAM low power enable */
+#define PMCCR_SLPEN 1 /* System low power enable */
+
+ u32 event;
+ u32 mask;
+/* All but PMCI are deep-sleep only */
+#define PMCER_GPIO 0x100
+#define PMCER_PCI 0x080
+#define PMCER_USB 0x040
+#define PMCER_ETSEC1 0x020
+#define PMCER_ETSEC2 0x010
+#define PMCER_TIMER 0x008
+#define PMCER_INT1 0x004
+#define PMCER_INT2 0x002
+#define PMCER_PMCI 0x001
+#define PMCER_ALL 0x1FF
+
+ /* deep-sleep only */
+ u32 config1;
+#define PMCCR1_USE_STATE 0x80000000
+#define PMCCR1_PME_EN 0x00000080
+#define PMCCR1_ASSERT_PME 0x00000040
+#define PMCCR1_POWER_OFF 0x00000020
+
+ /* deep-sleep only */
+ u32 config2;
+};
+
+struct mpc83xx_rcw {
+ u32 rcwlr;
+ u32 rcwhr;
+};
+
+struct mpc83xx_clock {
+ u32 spmr;
+ u32 occr;
+ u32 sccr;
+};
+
+struct mpc83xx_syscr {
+ __be32 sgprl;
+ __be32 sgprh;
+ __be32 spridr;
+ __be32 :32;
+ __be32 spcr;
+ __be32 sicrl;
+ __be32 sicrh;
+};
+
+struct mpc83xx_saved {
+ u32 sicrl;
+ u32 sicrh;
+ u32 sccr;
+};
+
+struct pmc_type {
+ int has_deep_sleep;
+};
+
+static int has_deep_sleep, deep_sleeping;
+static int pmc_irq;
+static struct mpc83xx_pmc __iomem *pmc_regs;
+static struct mpc83xx_clock __iomem *clock_regs;
+static struct mpc83xx_syscr __iomem *syscr_regs;
+static struct mpc83xx_saved saved_regs;
+static int is_pci_agent, wake_from_pci;
+static phys_addr_t immrbase;
+static int pci_pm_state;
+static DECLARE_WAIT_QUEUE_HEAD(agent_wq);
+
+int fsl_deep_sleep(void)
+{
+ return deep_sleeping;
+}
+EXPORT_SYMBOL(fsl_deep_sleep);
+
+static int mpc83xx_change_state(void)
+{
+ u32 curr_state;
+ u32 reg_cfg1 = in_be32(&pmc_regs->config1);
+
+ if (is_pci_agent) {
+ pci_pm_state = (reg_cfg1 & PMCCR1_NEXT_STATE) >>
+ PMCCR1_NEXT_STATE_SHIFT;
+ curr_state = reg_cfg1 & PMCCR1_CURR_STATE;
+
+ if (curr_state != pci_pm_state) {
+ reg_cfg1 &= ~PMCCR1_CURR_STATE;
+ reg_cfg1 |= pci_pm_state;
+ out_be32(&pmc_regs->config1, reg_cfg1);
+
+ wake_up(&agent_wq);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static irqreturn_t pmc_irq_handler(int irq, void *dev_id)
+{
+ u32 event = in_be32(&pmc_regs->event);
+ int ret = IRQ_NONE;
+
+ if (mpc83xx_change_state())
+ ret = IRQ_HANDLED;
+
+ if (event) {
+ out_be32(&pmc_regs->event, event);
+ ret = IRQ_HANDLED;
+ }
+
+ return ret;
+}
+
+static void mpc83xx_suspend_restore_regs(void)
+{
+ out_be32(&syscr_regs->sicrl, saved_regs.sicrl);
+ out_be32(&syscr_regs->sicrh, saved_regs.sicrh);
+ out_be32(&clock_regs->sccr, saved_regs.sccr);
+}
+
+static void mpc83xx_suspend_save_regs(void)
+{
+ saved_regs.sicrl = in_be32(&syscr_regs->sicrl);
+ saved_regs.sicrh = in_be32(&syscr_regs->sicrh);
+ saved_regs.sccr = in_be32(&clock_regs->sccr);
+}
+
+static int mpc83xx_suspend_enter(suspend_state_t state)
+{
+ int ret = -EAGAIN;
+
+ /* Don't go to sleep if there's a race where pci_pm_state changes
+ * between the agent thread checking it and the PM code disabling
+ * interrupts.
+ */
+ if (wake_from_pci) {
+ if (pci_pm_state != (deep_sleeping ? 3 : 2))
+ goto out;
+
+ out_be32(&pmc_regs->config1,
+ in_be32(&pmc_regs->config1) | PMCCR1_PME_EN);
+ }
+
+ /* Put the system into low-power mode and the RAM
+ * into self-refresh mode once the core goes to
+ * sleep.
+ */
+
+ out_be32(&pmc_regs->config, PMCCR_SLPEN | PMCCR_DLPEN);
+
+ /* If it has deep sleep (i.e. it's an 831x or compatible),
+ * disable power to the core upon entering sleep mode. This will
+ * require going through the boot firmware upon a wakeup event.
+ */
+
+ if (deep_sleeping) {
+ mpc83xx_suspend_save_regs();
+
+ out_be32(&pmc_regs->mask, PMCER_ALL);
+
+ out_be32(&pmc_regs->config1,
+ in_be32(&pmc_regs->config1) | PMCCR1_POWER_OFF);
+
+ enable_kernel_fp();
+
+ mpc83xx_enter_deep_sleep(immrbase);
+
+ out_be32(&pmc_regs->config1,
+ in_be32(&pmc_regs->config1) & ~PMCCR1_POWER_OFF);
+
+ out_be32(&pmc_regs->mask, PMCER_PMCI);
+
+ mpc83xx_suspend_restore_regs();
+ } else {
+ out_be32(&pmc_regs->mask, PMCER_PMCI);
+
+ mpc6xx_enter_standby();
+ }
+
+ ret = 0;
+
+out:
+ out_be32(&pmc_regs->config1,
+ in_be32(&pmc_regs->config1) & ~PMCCR1_PME_EN);
+
+ return ret;
+}
+
+static void mpc83xx_suspend_end(void)
+{
+ deep_sleeping = 0;
+}
+
+static int mpc83xx_suspend_valid(suspend_state_t state)
+{
+ return state == PM_SUSPEND_STANDBY || state == PM_SUSPEND_MEM;
+}
+
+static int mpc83xx_suspend_begin(suspend_state_t state)
+{
+ switch (state) {
+ case PM_SUSPEND_STANDBY:
+ deep_sleeping = 0;
+ return 0;
+
+ case PM_SUSPEND_MEM:
+ if (has_deep_sleep)
+ deep_sleeping = 1;
+
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static int agent_thread_fn(void *data)
+{
+ while (1) {
+ wait_event_interruptible(agent_wq, pci_pm_state >= 2);
+ try_to_freeze();
+
+ if (signal_pending(current) || pci_pm_state < 2)
+ continue;
+
+ /* With a preemptible kernel (or SMP), this could race with
+ * a userspace-driven suspend request. It's probably best
+ * to avoid mixing the two with such a configuration (or
+ * else fix it by adding a mutex to state_store that we can
+ * synchronize with).
+ */
+
+ wake_from_pci = 1;
+
+ pm_suspend(pci_pm_state == 3 ? PM_SUSPEND_MEM :
+ PM_SUSPEND_STANDBY);
+
+ wake_from_pci = 0;
+ }
+
+ return 0;
+}
+
+static void mpc83xx_set_agent(void)
+{
+ out_be32(&pmc_regs->config1, PMCCR1_USE_STATE);
+ out_be32(&pmc_regs->mask, PMCER_PMCI);
+
+ kthread_run(agent_thread_fn, NULL, "PCI power mgt");
+}
+
+static int mpc83xx_is_pci_agent(void)
+{
+ struct mpc83xx_rcw __iomem *rcw_regs;
+ int ret;
+
+ rcw_regs = ioremap(get_immrbase() + IMMR_RCW_OFFSET,
+ sizeof(struct mpc83xx_rcw));
+
+ if (!rcw_regs)
+ return -ENOMEM;
+
+ ret = !(in_be32(&rcw_regs->rcwhr) & RCW_PCI_HOST);
+
+ iounmap(rcw_regs);
+ return ret;
+}
+
+static const struct platform_suspend_ops mpc83xx_suspend_ops = {
+ .valid = mpc83xx_suspend_valid,
+ .begin = mpc83xx_suspend_begin,
+ .enter = mpc83xx_suspend_enter,
+ .end = mpc83xx_suspend_end,
+};
+
+static struct pmc_type pmc_types[] = {
+ {
+ .has_deep_sleep = 1,
+ },
+ {
+ .has_deep_sleep = 0,
+ }
+};
+
+static const struct of_device_id pmc_match[] = {
+ {
+ .compatible = "fsl,mpc8313-pmc",
+ .data = &pmc_types[0],
+ },
+ {
+ .compatible = "fsl,mpc8349-pmc",
+ .data = &pmc_types[1],
+ },
+ {}
+};
+
+static int pmc_probe(struct platform_device *ofdev)
+{
+ struct device_node *np = ofdev->dev.of_node;
+ struct resource res;
+ const struct pmc_type *type;
+ int ret = 0;
+
+ type = of_device_get_match_data(&ofdev->dev);
+ if (!type)
+ return -EINVAL;
+
+ if (!of_device_is_available(np))
+ return -ENODEV;
+
+ has_deep_sleep = type->has_deep_sleep;
+ immrbase = get_immrbase();
+
+ is_pci_agent = mpc83xx_is_pci_agent();
+ if (is_pci_agent < 0)
+ return is_pci_agent;
+
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret)
+ return -ENODEV;
+
+ pmc_irq = irq_of_parse_and_map(np, 0);
+ if (pmc_irq) {
+ ret = request_irq(pmc_irq, pmc_irq_handler, IRQF_SHARED,
+ "pmc", ofdev);
+
+ if (ret)
+ return -EBUSY;
+ }
+
+ pmc_regs = ioremap(res.start, sizeof(*pmc_regs));
+
+ if (!pmc_regs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = of_address_to_resource(np, 1, &res);
+ if (ret) {
+ ret = -ENODEV;
+ goto out_pmc;
+ }
+
+ clock_regs = ioremap(res.start, sizeof(*clock_regs));
+
+ if (!clock_regs) {
+ ret = -ENOMEM;
+ goto out_pmc;
+ }
+
+ if (has_deep_sleep) {
+ syscr_regs = ioremap(immrbase + IMMR_SYSCR_OFFSET,
+ sizeof(*syscr_regs));
+ if (!syscr_regs) {
+ ret = -ENOMEM;
+ goto out_syscr;
+ }
+ }
+
+ if (is_pci_agent)
+ mpc83xx_set_agent();
+
+ suspend_set_ops(&mpc83xx_suspend_ops);
+ return 0;
+
+out_syscr:
+ iounmap(clock_regs);
+out_pmc:
+ iounmap(pmc_regs);
+out:
+ if (pmc_irq)
+ free_irq(pmc_irq, ofdev);
+
+ return ret;
+}
+
+static struct platform_driver pmc_driver = {
+ .driver = {
+ .name = "mpc83xx-pmc",
+ .of_match_table = pmc_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = pmc_probe,
+};
+
+builtin_platform_driver(pmc_driver);
diff --git a/arch/powerpc/platforms/83xx/usb_831x.c b/arch/powerpc/platforms/83xx/usb_831x.c
new file mode 100644
index 000000000..28c24e90f
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_831x.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc831x_usb_cfg(void)
+{
+ u32 temp;
+ void __iomem *immap, *usb_regs;
+ struct device_node *np = NULL;
+ struct device_node *immr_node = NULL;
+ const void *prop;
+ struct resource res;
+ int ret = 0;
+#ifdef CONFIG_USB_OTG
+ const void *dr_mode;
+#endif
+
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+ if (!np)
+ return -ENODEV;
+ prop = of_get_property(np, "phy_type", NULL);
+
+ /* Map IMMR space for pin and clock settings */
+ immap = ioremap(get_immrbase(), 0x1000);
+ if (!immap) {
+ of_node_put(np);
+ return -ENOMEM;
+ }
+
+ /* Configure clock */
+ immr_node = of_get_parent(np);
+ if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") ||
+ of_device_is_compatible(immr_node, "fsl,mpc8308-immr")))
+ clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+ MPC8315_SCCR_USB_MASK,
+ MPC8315_SCCR_USB_DRCM_01);
+ else
+ clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+ MPC83XX_SCCR_USB_MASK,
+ MPC83XX_SCCR_USB_DRCM_11);
+
+ /* Configure pin mux for ULPI. There is no pin mux for UTMI */
+ if (prop && !strcmp(prop, "ulpi")) {
+ if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+ clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+ MPC8308_SICRH_USB_MASK,
+ MPC8308_SICRH_USB_ULPI);
+ } else if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) {
+ clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+ MPC8315_SICRL_USB_MASK,
+ MPC8315_SICRL_USB_ULPI);
+ clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+ MPC8315_SICRH_USB_MASK,
+ MPC8315_SICRH_USB_ULPI);
+ } else {
+ clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+ MPC831X_SICRL_USB_MASK,
+ MPC831X_SICRL_USB_ULPI);
+ clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+ MPC831X_SICRH_USB_MASK,
+ MPC831X_SICRH_USB_ULPI);
+ }
+ }
+
+ iounmap(immap);
+
+ of_node_put(immr_node);
+
+ /* Map USB SOC space */
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret) {
+ of_node_put(np);
+ return ret;
+ }
+ usb_regs = ioremap(res.start, resource_size(&res));
+
+ /* Using on-chip PHY */
+ if (prop && (!strcmp(prop, "utmi_wide") || !strcmp(prop, "utmi"))) {
+ u32 refsel;
+
+ if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))
+ goto out;
+
+ if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr"))
+ refsel = CONTROL_REFSEL_24MHZ;
+ else
+ refsel = CONTROL_REFSEL_48MHZ;
+ /* Set UTMI_PHY_EN and REFSEL */
+ out_be32(usb_regs + FSL_USB2_CONTROL_OFFS,
+ CONTROL_UTMI_PHY_EN | refsel);
+ /* Using external UPLI PHY */
+ } else if (prop && !strcmp(prop, "ulpi")) {
+ /* Set PHY_CLK_SEL to ULPI */
+ temp = CONTROL_PHY_CLK_SEL_ULPI;
+#ifdef CONFIG_USB_OTG
+ /* Set OTG_PORT */
+ if (!of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+ dr_mode = of_get_property(np, "dr_mode", NULL);
+ if (dr_mode && !strcmp(dr_mode, "otg"))
+ temp |= CONTROL_OTG_PORT;
+ }
+#endif /* CONFIG_USB_OTG */
+ out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp);
+ } else {
+ pr_warn("831x USB PHY type not supported\n");
+ ret = -EINVAL;
+ }
+
+out:
+ iounmap(usb_regs);
+ of_node_put(np);
+ return ret;
+}
diff --git a/arch/powerpc/platforms/83xx/usb_834x.c b/arch/powerpc/platforms/83xx/usb_834x.c
new file mode 100644
index 000000000..3a8d6c662
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_834x.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc834x_usb_cfg(void)
+{
+ unsigned long sccr, sicrl, sicrh;
+ void __iomem *immap;
+ struct device_node *np = NULL;
+ int port0_is_dr = 0, port1_is_dr = 0;
+ const void *prop, *dr_mode;
+
+ immap = ioremap(get_immrbase(), 0x1000);
+ if (!immap)
+ return -ENOMEM;
+
+ /* Read registers */
+ /* Note: DR and MPH must use the same clock setting in SCCR */
+ sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK;
+ sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK;
+ sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+ if (np) {
+ sccr |= MPC83XX_SCCR_USB_DRCM_11; /* 1:3 */
+
+ prop = of_get_property(np, "phy_type", NULL);
+ port1_is_dr = 1;
+ if (prop &&
+ (!strcmp(prop, "utmi") || !strcmp(prop, "utmi_wide"))) {
+ sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+ sicrh |= MPC834X_SICRH_USB_UTMI;
+ port0_is_dr = 1;
+ } else if (prop && !strcmp(prop, "serial")) {
+ dr_mode = of_get_property(np, "dr_mode", NULL);
+ if (dr_mode && !strcmp(dr_mode, "otg")) {
+ sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+ port0_is_dr = 1;
+ } else {
+ sicrl |= MPC834X_SICRL_USB1;
+ }
+ } else if (prop && !strcmp(prop, "ulpi")) {
+ sicrl |= MPC834X_SICRL_USB1;
+ } else {
+ pr_warn("834x USB PHY type not supported\n");
+ }
+ of_node_put(np);
+ }
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph");
+ if (np) {
+ sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */
+
+ prop = of_get_property(np, "port0", NULL);
+ if (prop) {
+ if (port0_is_dr)
+ pr_warn("834x USB port0 can't be used by both DR and MPH!\n");
+ sicrl &= ~MPC834X_SICRL_USB0;
+ }
+ prop = of_get_property(np, "port1", NULL);
+ if (prop) {
+ if (port1_is_dr)
+ pr_warn("834x USB port1 can't be used by both DR and MPH!\n");
+ sicrl &= ~MPC834X_SICRL_USB1;
+ }
+ of_node_put(np);
+ }
+
+ /* Write back */
+ out_be32(immap + MPC83XX_SCCR_OFFS, sccr);
+ out_be32(immap + MPC83XX_SICRL_OFFS, sicrl);
+ out_be32(immap + MPC83XX_SICRH_OFFS, sicrh);
+
+ iounmap(immap);
+ return 0;
+}
diff --git a/arch/powerpc/platforms/83xx/usb_837x.c b/arch/powerpc/platforms/83xx/usb_837x.c
new file mode 100644
index 000000000..726935bb6
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_837x.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc837x_usb_cfg(void)
+{
+ void __iomem *immap;
+ struct device_node *np = NULL;
+ const void *prop;
+ int ret = 0;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+ if (!np || !of_device_is_available(np)) {
+ of_node_put(np);
+ return -ENODEV;
+ }
+ prop = of_get_property(np, "phy_type", NULL);
+
+ if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
+ pr_warn("837x USB PHY type not supported\n");
+ of_node_put(np);
+ return -EINVAL;
+ }
+
+ /* Map IMMR space for pin and clock settings */
+ immap = ioremap(get_immrbase(), 0x1000);
+ if (!immap) {
+ of_node_put(np);
+ return -ENOMEM;
+ }
+
+ /* Configure clock */
+ clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11,
+ MPC837X_SCCR_USB_DRCM_11);
+
+ /* Configure pin mux for ULPI/serial */
+ clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK,
+ MPC837X_SICRL_USB_ULPI);
+
+ iounmap(immap);
+ of_node_put(np);
+ return ret;
+}
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
new file mode 100644
index 000000000..9315a3b69
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -0,0 +1,291 @@
+# SPDX-License-Identifier: GPL-2.0
+menuconfig FSL_SOC_BOOKE
+ bool "Freescale Book-E Machine Type"
+ depends on PPC_E500
+ select FSL_SOC
+ select PPC_UDBG_16550
+ select MPIC
+ select HAVE_PCI
+ select FSL_PCI if PCI
+ select SERIAL_8250_EXTENDED if SERIAL_8250
+ select SERIAL_8250_SHARE_IRQ if SERIAL_8250
+ select FSL_CORENET_RCPM if PPC_E500MC
+ default y
+
+if FSL_SOC_BOOKE
+
+if PPC32
+
+config BSC9131_RDB
+ bool "Freescale BSC9131RDB"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the Freescale BSC9131RDB board.
+ The BSC9131 is a heterogeneous SoC containing an e500v2 powerpc and a
+ StarCore SC3850 DSP
+ Manufacturer : Freescale Semiconductor, Inc
+
+config C293_PCIE
+ bool "Freescale C293PCIE"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the C293PCIE board
+
+config BSC9132_QDS
+ bool "Freescale BSC9132QDS"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the Freescale BSC9132 QDS board.
+ BSC9132 is a heterogeneous SoC containing dual e500v2 powerpc cores
+ and dual StarCore SC3850 DSP cores.
+ Manufacturer : Freescale Semiconductor, Inc
+
+config MPC8540_ADS
+ bool "Freescale MPC8540 ADS"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the MPC 8540 ADS board
+
+config MPC8560_ADS
+ bool "Freescale MPC8560 ADS"
+ select DEFAULT_UIMAGE
+ select CPM2
+ help
+ This option enables support for the MPC 8560 ADS board
+
+config MPC85xx_CDS
+ bool "Freescale MPC85xx CDS"
+ select DEFAULT_UIMAGE
+ select PPC_I8259
+ select HAVE_RAPIDIO
+ help
+ This option enables support for the MPC85xx CDS board
+
+config MPC85xx_MDS
+ bool "Freescale MPC8568 MDS / MPC8569 MDS / P1021 MDS"
+ select DEFAULT_UIMAGE
+ select PHYLIB if NETDEVICES
+ select HAVE_RAPIDIO
+ select SWIOTLB
+ help
+ This option enables support for the MPC8568 MDS, MPC8569 MDS and P1021 MDS boards
+
+config MPC8536_DS
+ bool "Freescale MPC8536 DS"
+ select DEFAULT_UIMAGE
+ select SWIOTLB
+ help
+ This option enables support for the MPC8536 DS board
+
+config MPC85xx_DS
+ bool "Freescale MPC8544 DS / MPC8572 DS"
+ select PPC_I8259
+ select DEFAULT_UIMAGE
+ select FSL_ULI1575 if PCI
+ select SWIOTLB
+ help
+ This option enables support for the MPC8544 DS and MPC8572 DS boards
+
+config MPC85xx_RDB
+ bool "Freescale P102x MBG/UTM/RDB"
+ select PPC_I8259
+ select DEFAULT_UIMAGE
+ select SWIOTLB
+ help
+ This option enables support for the P1020 MBG PC, P1020 UTM PC,
+ P1020 RDB PC, P1020 RDB PD, P1020 RDB, P1021 RDB PC, P1024 RDB,
+ and P1025 RDB boards
+
+config PPC_P2020
+ bool "Freescale P2020"
+ default y if MPC85xx_DS || MPC85xx_RDB
+ select DEFAULT_UIMAGE
+ select SWIOTLB
+ imply PPC_I8259
+ imply FSL_ULI1575 if PCI
+ help
+ This option enables generic unified support for any board with the
+ Freescale P2020 processor.
+
+ For example: P2020 DS board, P2020 RDB board, P2020 RDB PC board or
+ CZ.NIC Turris 1.x boards.
+
+config P1010_RDB
+ bool "Freescale P1010 RDB"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the P1010 RDB board
+
+ P1010RDB contains P1010Si, which provides CPU performance up to 800
+ MHz and 1600 DMIPS, additional functionality and faster interfaces
+ (DDR3/3L, SATA II, and PCI Express).
+
+config P1022_DS
+ bool "Freescale P1022 DS"
+ select DEFAULT_UIMAGE
+ select SWIOTLB
+ help
+ This option enables support for the Freescale P1022DS reference board.
+
+config P1022_RDK
+ bool "Freescale / iVeia P1022 RDK"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the Freescale / iVeia P1022RDK
+ reference board.
+
+config P1023_RDB
+ bool "Freescale P1023 RDB"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the P1023 RDB board.
+
+config TWR_P102x
+ bool "Freescale TWR-P102x"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the TWR-P1025 board.
+
+config SOCRATES
+ bool "Socrates"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the Socrates board.
+
+config KSI8560
+ bool "Emerson KSI8560"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the Emerson KSI8560 board
+
+config XES_MPC85xx
+ bool "X-ES single-board computer"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the various single-board
+ computers from Extreme Engineering Solutions (X-ES) based on
+ Freescale MPC85xx processors.
+ Manufacturer: Extreme Engineering Solutions, Inc.
+ URL: <https://www.xes-inc.com/>
+
+config STX_GP3
+ bool "Silicon Turnkey Express GP3"
+ help
+ This option enables support for the Silicon Turnkey Express GP3
+ board.
+ select CPM2
+ select DEFAULT_UIMAGE
+
+config TQM8540
+ bool "TQ Components TQM8540"
+ help
+ This option enables support for the TQ Components TQM8540 board.
+ select DEFAULT_UIMAGE
+ select TQM85xx
+
+config TQM8541
+ bool "TQ Components TQM8541"
+ help
+ This option enables support for the TQ Components TQM8541 board.
+ select DEFAULT_UIMAGE
+ select TQM85xx
+ select CPM2
+
+config TQM8548
+ bool "TQ Components TQM8548"
+ help
+ This option enables support for the TQ Components TQM8548 board.
+ select DEFAULT_UIMAGE
+ select TQM85xx
+
+config TQM8555
+ bool "TQ Components TQM8555"
+ help
+ This option enables support for the TQ Components TQM8555 board.
+ select DEFAULT_UIMAGE
+ select TQM85xx
+ select CPM2
+
+config TQM8560
+ bool "TQ Components TQM8560"
+ help
+ This option enables support for the TQ Components TQM8560 board.
+ select DEFAULT_UIMAGE
+ select TQM85xx
+ select CPM2
+
+config PPA8548
+ bool "Prodrive PPA8548"
+ help
+ This option enables support for the Prodrive PPA8548 board.
+ select DEFAULT_UIMAGE
+ select HAVE_RAPIDIO
+
+config GE_IMP3A
+ bool "GE Intelligent Platforms IMP3A"
+ select DEFAULT_UIMAGE
+ select SWIOTLB
+ select MMIO_NVRAM
+ select GPIOLIB
+ select GE_FPGA
+ help
+ This option enables support for the GE Intelligent Platforms IMP3A
+ board.
+
+ This board is a 3U CompactPCI Single Board Computer with a Freescale
+ P2020 processor.
+
+config SGY_CTS1000
+ tristate "Servergy CTS-1000 support"
+ select GPIOLIB
+ select OF_GPIO
+ depends on CORENET_GENERIC
+ help
+ Enable this to support functionality in Servergy's CTS-1000 systems.
+
+config MVME2500
+ bool "Artesyn MVME2500"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the Emerson/Artesyn MVME2500 board.
+
+endif # PPC32
+
+config PPC_QEMU_E500
+ bool "QEMU generic e500 platform"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for running as a QEMU guest using
+ QEMU's generic e500 machine. This is not required if you're
+ using a QEMU machine that targets a specific board, such as
+ mpc8544ds.
+
+ Unlike most e500 boards that target a specific CPU, this
+ platform works with any e500-family CPU that QEMU supports.
+ Thus, you'll need to make sure CONFIG_PPC_E500MC is set or
+ unset based on the emulated CPU (or actual host CPU in the case
+ of KVM).
+
+config CORENET_GENERIC
+ bool "Freescale CoreNet Generic"
+ select DEFAULT_UIMAGE
+ select PPC_E500MC
+ select PHYS_64BIT
+ select SWIOTLB
+ select GPIOLIB
+ select GPIO_MPC8XXX
+ select HAVE_RAPIDIO
+ select PPC_EPAPR_HV_PIC
+ help
+ This option enables support for the FSL CoreNet based boards.
+ For 32bit kernel, the following boards are supported:
+ P2041 RDB, P3041 DS, P4080 DS, kmcoge4, and OCA4080
+ For 64bit kernel, the following boards are supported:
+ T208x QDS/RDB, T4240 QDS/RDB and B4 QDS
+ The following boards are supported for both 32bit and 64bit kernel:
+ P5020 DS, P5040 DS, T102x QDS/RDB, T104x QDS/RDB
+
+endif # FSL_SOC_BOOKE
+
+config TQM85xx
+ bool
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
new file mode 100644
index 000000000..43c34f26f
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PowerPC 85xx linux kernel.
+#
+obj-$(CONFIG_SMP) += smp.o
+ifneq ($(CONFIG_FSL_CORENET_RCPM),y)
+obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o
+endif
+
+obj-y += common.o
+
+obj-$(CONFIG_BSC9131_RDB) += bsc913x_rdb.o
+obj-$(CONFIG_BSC9132_QDS) += bsc913x_qds.o
+obj-$(CONFIG_C293_PCIE) += c293pcie.o
+obj-$(CONFIG_MPC8536_DS) += mpc8536_ds.o
+obj8259-$(CONFIG_PPC_I8259) += mpc85xx_8259.o
+obj-$(CONFIG_MPC85xx_DS) += mpc85xx_ds.o $(obj8259-y)
+obj-$(CONFIG_MPC85xx_MDS) += mpc85xx_mds.o
+obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o
+obj-$(CONFIG_P1010_RDB) += p1010rdb.o
+obj-$(CONFIG_P1022_DS) += p1022_ds.o
+obj-$(CONFIG_P1022_RDK) += p1022_rdk.o
+obj-$(CONFIG_P1023_RDB) += p1023_rdb.o
+obj-$(CONFIG_PPC_P2020) += p2020.o $(obj8259-y)
+obj-$(CONFIG_TWR_P102x) += twr_p102x.o
+obj-$(CONFIG_CORENET_GENERIC) += corenet_generic.o
+obj-$(CONFIG_FB_FSL_DIU) += t1042rdb_diu.o
+obj-$(CONFIG_STX_GP3) += stx_gp3.o
+obj-$(CONFIG_TQM85xx) += tqm85xx.o
+obj-$(CONFIG_PPA8548) += ppa8548.o
+obj-$(CONFIG_SOCRATES) += socrates.o socrates_fpga_pic.o
+obj-$(CONFIG_KSI8560) += ksi8560.o
+obj-$(CONFIG_XES_MPC85xx) += xes_mpc85xx.o
+obj-$(CONFIG_GE_IMP3A) += ge_imp3a.o
+obj-$(CONFIG_PPC_QEMU_E500) += qemu_e500.o
+obj-$(CONFIG_SGY_CTS1000) += sgy_cts1000.o
+obj-$(CONFIG_MVME2500) += mvme2500.o
diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c
new file mode 100644
index 000000000..2eb62bff8
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * BSC913xQDS Board Setup
+ *
+ * Author:
+ * Harninder Rai <harninder.rai@freescale.com>
+ * Priyanka Jain <Priyanka.Jain@freescale.com>
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ */
+
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/udbg.h>
+
+#include "mpc85xx.h"
+#include "smp.h"
+
+void __init bsc913x_qds_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+
+ if (!mpic)
+ pr_err("bsc913x: Failed to allocate MPIC structure\n");
+ else
+ mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init bsc913x_qds_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("bsc913x_qds_setup_arch()", 0);
+
+#if defined(CONFIG_SMP)
+ mpc85xx_smp_init();
+#endif
+
+ fsl_pci_assign_primary();
+
+ pr_info("bsc913x board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(bsc9132_qds, mpc85xx_common_publish_devices);
+
+define_machine(bsc9132_qds) {
+ .name = "BSC9132 QDS",
+ .compatible = "fsl,bsc9132qds",
+ .setup_arch = bsc913x_qds_setup_arch,
+ .init_IRQ = bsc913x_qds_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/bsc913x_rdb.c b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
new file mode 100644
index 000000000..161f006cb
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * BSC913xRDB Board Setup
+ *
+ * Author: Priyanka Jain <Priyanka.Jain@freescale.com>
+ *
+ * Copyright 2011-2012 Freescale Semiconductor Inc.
+ */
+
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <asm/udbg.h>
+
+#include "mpc85xx.h"
+
+void __init bsc913x_rdb_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+
+ if (!mpic)
+ pr_err("bsc913x: Failed to allocate MPIC structure\n");
+ else
+ mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init bsc913x_rdb_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("bsc913x_rdb_setup_arch()", 0);
+
+ pr_info("bsc913x board from Freescale Semiconductor\n");
+}
+
+machine_device_initcall(bsc9131_rdb, mpc85xx_common_publish_devices);
+
+define_machine(bsc9131_rdb) {
+ .name = "BSC9131 RDB",
+ .compatible = "fsl,bsc9131rdb",
+ .setup_arch = bsc913x_rdb_setup_arch,
+ .init_IRQ = bsc913x_rdb_pic_init,
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c
new file mode 100644
index 000000000..7a63a3ad5
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/c293pcie.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * C293PCIE Board Setup
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+static void __init c293_pcie_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC ");
+
+ BUG_ON(mpic == NULL);
+
+ mpic_init(mpic);
+}
+
+
+/*
+ * Setup the architecture
+ */
+static void __init c293_pcie_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("c293_pcie_setup_arch()", 0);
+
+ fsl_pci_assign_primary();
+
+ printk(KERN_INFO "C293 PCIE board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(c293_pcie, mpc85xx_common_publish_devices);
+
+define_machine(c293_pcie) {
+ .name = "C293 PCIE",
+ .compatible = "fsl,C293PCIE",
+ .setup_arch = c293_pcie_setup_arch,
+ .init_IRQ = c293_pcie_pic_init,
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
new file mode 100644
index 000000000..757811155
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Routines common to most mpc85xx-based boards.
+ */
+
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+#include <asm/fsl_pm.h>
+#include <soc/fsl/qe/qe.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "mpc85xx.h"
+
+const struct fsl_pm_ops *qoriq_pm_ops;
+
+static const struct of_device_id mpc85xx_common_ids[] __initconst = {
+ { .type = "soc", },
+ { .compatible = "soc", },
+ { .compatible = "simple-bus", },
+ { .name = "cpm", },
+ { .name = "localbus", },
+ { .compatible = "gianfar", },
+ { .compatible = "fsl,qe", },
+ { .compatible = "fsl,cpm2", },
+ { .compatible = "fsl,srio", },
+ /* So that the DMA channel nodes can be probed individually: */
+ { .compatible = "fsl,eloplus-dma", },
+ /* For the PMC driver */
+ { .compatible = "fsl,mpc8548-guts", },
+ /* Probably unnecessary? */
+ { .compatible = "gpio-leds", },
+ /* For all PCI controllers */
+ { .compatible = "fsl,mpc8540-pci", },
+ { .compatible = "fsl,mpc8548-pcie", },
+ { .compatible = "fsl,p1022-pcie", },
+ { .compatible = "fsl,p1010-pcie", },
+ { .compatible = "fsl,p1023-pcie", },
+ { .compatible = "fsl,p4080-pcie", },
+ { .compatible = "fsl,qoriq-pcie-v2.4", },
+ { .compatible = "fsl,qoriq-pcie-v2.3", },
+ { .compatible = "fsl,qoriq-pcie-v2.2", },
+ { .compatible = "fsl,fman", },
+ {},
+};
+
+int __init mpc85xx_common_publish_devices(void)
+{
+ return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL);
+}
+#ifdef CONFIG_CPM2
+static void cpm2_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ int cascade_irq;
+
+ while ((cascade_irq = cpm2_get_irq()) >= 0)
+ generic_handle_irq(cascade_irq);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+
+void __init mpc85xx_cpm2_pic_init(void)
+{
+ struct device_node *np;
+ int irq;
+
+ /* Setup CPM2 PIC */
+ np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
+ if (np == NULL) {
+ printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
+ return;
+ }
+ irq = irq_of_parse_and_map(np, 0);
+ if (!irq) {
+ of_node_put(np);
+ printk(KERN_ERR "PIC init: got no IRQ for cpm cascade\n");
+ return;
+ }
+
+ cpm2_pic_init(np);
+ of_node_put(np);
+ irq_set_chained_handler(irq, cpm2_cascade);
+}
+#endif
+
+#ifdef CONFIG_QUICC_ENGINE
+void __init mpc85xx_qe_par_io_init(void)
+{
+ struct device_node *np;
+
+ np = of_find_node_by_name(NULL, "par_io");
+ if (np) {
+ struct device_node *ucc;
+
+ par_io_init(np);
+ of_node_put(np);
+
+ for_each_node_by_name(ucc, "ucc")
+ par_io_of_config(ucc);
+
+ }
+}
+#endif
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
new file mode 100644
index 000000000..645fcca77
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Corenet based SoC DS Setup
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2009-2011 Freescale Semiconductor Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/pgtable.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/ehv_pic.h>
+#include <asm/swiotlb.h>
+
+#include <linux/of_platform.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+#include "mpc85xx.h"
+
+static void __init corenet_gen_pic_init(void)
+{
+ struct mpic *mpic;
+ unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
+ MPIC_NO_RESET;
+
+ if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) && !IS_ENABLED(CONFIG_KEXEC_CORE))
+ flags |= MPIC_ENABLE_COREINT;
+
+ mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+
+ mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init corenet_gen_setup_arch(void)
+{
+ mpc85xx_smp_init();
+
+ swiotlb_detect_4g();
+
+ pr_info("%s board\n", ppc_md.name);
+}
+
+static const struct of_device_id of_device_ids[] = {
+ {
+ .compatible = "simple-bus"
+ },
+ {
+ .compatible = "mdio-mux-gpio"
+ },
+ {
+ .compatible = "fsl,fpga-ngpixis"
+ },
+ {
+ .compatible = "fsl,fpga-qixis"
+ },
+ {
+ .compatible = "fsl,srio",
+ },
+ {
+ .compatible = "fsl,p4080-pcie",
+ },
+ {
+ .compatible = "fsl,qoriq-pcie-v2.2",
+ },
+ {
+ .compatible = "fsl,qoriq-pcie-v2.3",
+ },
+ {
+ .compatible = "fsl,qoriq-pcie-v2.4",
+ },
+ {
+ .compatible = "fsl,qoriq-pcie-v3.0",
+ },
+ {
+ .compatible = "fsl,qe",
+ },
+ /* The following two are for the Freescale hypervisor */
+ {
+ .name = "hypervisor",
+ },
+ {
+ .name = "handles",
+ },
+ {}
+};
+
+static int __init corenet_gen_publish_devices(void)
+{
+ return of_platform_bus_probe(NULL, of_device_ids, NULL);
+}
+machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
+
+static const char * const boards[] __initconst = {
+ "fsl,P2041RDB",
+ "fsl,P3041DS",
+ "fsl,OCA4080",
+ "fsl,P4080DS",
+ "fsl,P5020DS",
+ "fsl,P5040DS",
+ "fsl,T2080QDS",
+ "fsl,T2080RDB",
+ "fsl,T2081QDS",
+ "fsl,T4240QDS",
+ "fsl,T4240RDB",
+ "fsl,B4860QDS",
+ "fsl,B4420QDS",
+ "fsl,B4220QDS",
+ "fsl,T1023RDB",
+ "fsl,T1024QDS",
+ "fsl,T1024RDB",
+ "fsl,T1040D4RDB",
+ "fsl,T1042D4RDB",
+ "fsl,T1040QDS",
+ "fsl,T1042QDS",
+ "fsl,T1040RDB",
+ "fsl,T1042RDB",
+ "fsl,T1042RDB_PI",
+ "keymile,kmcent2",
+ "keymile,kmcoge4",
+ "varisys,CYRUS",
+ NULL
+};
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init corenet_generic_probe(void)
+{
+ char hv_compat[24];
+ int i;
+#ifdef CONFIG_SMP
+ extern struct smp_ops_t smp_85xx_ops;
+#endif
+
+ if (of_device_compatible_match(of_root, boards))
+ return 1;
+
+ /* Check if we're running under the Freescale hypervisor */
+ for (i = 0; boards[i]; i++) {
+ snprintf(hv_compat, sizeof(hv_compat), "%s-hv", boards[i]);
+ if (of_machine_is_compatible(hv_compat)) {
+ ppc_md.init_IRQ = ehv_pic_init;
+
+ ppc_md.get_irq = ehv_pic_get_irq;
+ ppc_md.restart = fsl_hv_restart;
+ pm_power_off = fsl_hv_halt;
+ ppc_md.halt = fsl_hv_halt;
+#ifdef CONFIG_SMP
+ /*
+ * Disable the timebase sync operations because we
+ * can't write to the timebase registers under the
+ * hypervisor.
+ */
+ smp_85xx_ops.give_timebase = NULL;
+ smp_85xx_ops.take_timebase = NULL;
+#endif
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+define_machine(corenet_generic) {
+ .name = "CoreNet Generic",
+ .probe = corenet_generic_probe,
+ .setup_arch = corenet_gen_setup_arch,
+ .init_IRQ = corenet_gen_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+/*
+ * Core reset may cause issues if using the proxy mode of MPIC.
+ * So, use the mixed mode of MPIC if enabling CPU hotplug.
+ *
+ * Likewise, problems have been seen with kexec when coreint is enabled.
+ */
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE)
+ .get_irq = mpic_get_irq,
+#else
+ .get_irq = mpic_get_coreint_irq,
+#endif
+ .progress = udbg_progress,
+ .power_save = e500_idle,
+};
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
new file mode 100644
index 000000000..9c3b44a19
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE IMP3A Board Setup
+ *
+ * Author Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2010 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: mpc85xx_ds.c (MPC85xx DS Board Setup)
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+#include <sysdev/ge/ge_pic.h>
+
+void __iomem *imp3a_regs;
+
+void __init ge_imp3a_pic_init(void)
+{
+ struct mpic *mpic;
+ struct device_node *np;
+ struct device_node *cascade_node = NULL;
+
+ if (of_machine_is_compatible("fsl,MPC8572DS-CAMP")) {
+ mpic = mpic_alloc(NULL, 0,
+ MPIC_NO_RESET |
+ MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+ } else {
+ mpic = mpic_alloc(NULL, 0,
+ MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+ }
+
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+ /*
+ * There is a simple interrupt handler in the main FPGA, this needs
+ * to be cascaded into the MPIC
+ */
+ for_each_node_by_type(np, "interrupt-controller")
+ if (of_device_is_compatible(np, "gef,fpga-pic-1.00")) {
+ cascade_node = np;
+ break;
+ }
+
+ if (cascade_node == NULL) {
+ printk(KERN_WARNING "IMP3A: No FPGA PIC\n");
+ return;
+ }
+
+ gef_pic_init(cascade_node);
+ of_node_put(cascade_node);
+}
+
+static void __init ge_imp3a_pci_assign_primary(void)
+{
+#ifdef CONFIG_PCI
+ struct device_node *np;
+ struct resource rsrc;
+
+ for_each_node_by_type(np, "pci") {
+ if (of_device_is_compatible(np, "fsl,mpc8540-pci") ||
+ of_device_is_compatible(np, "fsl,mpc8548-pcie") ||
+ of_device_is_compatible(np, "fsl,p2020-pcie")) {
+ of_address_to_resource(np, 0, &rsrc);
+ if ((rsrc.start & 0xfffff) == 0x9000) {
+ of_node_put(fsl_pci_primary);
+ fsl_pci_primary = of_node_get(np);
+ }
+ }
+ }
+#endif
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init ge_imp3a_setup_arch(void)
+{
+ struct device_node *regs;
+
+ if (ppc_md.progress)
+ ppc_md.progress("ge_imp3a_setup_arch()", 0);
+
+ mpc85xx_smp_init();
+
+ ge_imp3a_pci_assign_primary();
+
+ swiotlb_detect_4g();
+
+ /* Remap basic board registers */
+ regs = of_find_compatible_node(NULL, NULL, "ge,imp3a-fpga-regs");
+ if (regs) {
+ imp3a_regs = of_iomap(regs, 0);
+ if (imp3a_regs == NULL)
+ printk(KERN_WARNING "Unable to map board registers\n");
+ of_node_put(regs);
+ }
+
+#if defined(CONFIG_MMIO_NVRAM)
+ mmio_nvram_init();
+#endif
+
+ printk(KERN_INFO "GE Intelligent Platforms IMP3A 3U cPCI SBC\n");
+}
+
+/* Return the PCB revision */
+static unsigned int ge_imp3a_get_pcb_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread16(imp3a_regs);
+ return (reg >> 8) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int ge_imp3a_get_board_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread16(imp3a_regs + 0x2);
+ return reg & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int ge_imp3a_get_fpga_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread16(imp3a_regs + 0x2);
+ return (reg >> 8) & 0xff;
+}
+
+/* Return compactPCI Geographical Address */
+static unsigned int ge_imp3a_get_cpci_geo_addr(void)
+{
+ unsigned int reg;
+
+ reg = ioread16(imp3a_regs + 0x6);
+ return (reg & 0x0f00) >> 8;
+}
+
+/* Return compactPCI System Controller Status */
+static unsigned int ge_imp3a_get_cpci_is_syscon(void)
+{
+ unsigned int reg;
+
+ reg = ioread16(imp3a_regs + 0x6);
+ return reg & (1 << 12);
+}
+
+static void ge_imp3a_show_cpuinfo(struct seq_file *m)
+{
+ seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+ seq_printf(m, "Revision\t: %u%c\n", ge_imp3a_get_pcb_rev(),
+ ('A' + ge_imp3a_get_board_rev() - 1));
+
+ seq_printf(m, "FPGA Revision\t: %u\n", ge_imp3a_get_fpga_rev());
+
+ seq_printf(m, "cPCI geo. addr\t: %u\n", ge_imp3a_get_cpci_geo_addr());
+
+ seq_printf(m, "cPCI syscon\t: %s\n",
+ ge_imp3a_get_cpci_is_syscon() ? "yes" : "no");
+}
+
+machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices);
+
+define_machine(ge_imp3a) {
+ .name = "GE_IMP3A",
+ .compatible = "ge,IMP3A",
+ .setup_arch = ge_imp3a_setup_arch,
+ .init_IRQ = ge_imp3a_pic_init,
+ .show_cpuinfo = ge_imp3a_show_cpuinfo,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
new file mode 100644
index 000000000..1b6326a4b
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -0,0 +1,184 @@
+/*
+ * Board setup routines for the Emerson KSI8560
+ *
+ * Author: Alexandr Smirnov <asmirnov@ru.mvista.com>
+ *
+ * Based on mpc85xx_ads.c maintained by Kumar Gala
+ *
+ * 2008 (c) MontaVista, Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ *
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include <asm/cpm2.h>
+#include <sysdev/cpm2_pic.h>
+
+#include "mpc85xx.h"
+
+#define KSI8560_CPLD_HVR 0x04 /* Hardware Version Register */
+#define KSI8560_CPLD_PVR 0x08 /* PLD Version Register */
+#define KSI8560_CPLD_RCR1 0x30 /* Reset Command Register 1 */
+
+#define KSI8560_CPLD_RCR1_CPUHR 0x80 /* CPU Hard Reset */
+
+static void __iomem *cpld_base = NULL;
+
+static void __noreturn machine_restart(char *cmd)
+{
+ if (cpld_base)
+ out_8(cpld_base + KSI8560_CPLD_RCR1, KSI8560_CPLD_RCR1_CPUHR);
+ else
+ printk(KERN_ERR "Can't find CPLD base, hang forever\n");
+
+ for (;;);
+}
+
+static void __init ksi8560_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+
+ mpc85xx_cpm2_pic_init();
+}
+
+#ifdef CONFIG_CPM2
+/*
+ * Setup I/O ports
+ */
+struct cpm_pin {
+ int port, pin, flags;
+};
+
+static struct cpm_pin __initdata ksi8560_pins[] = {
+ /* SCC1 */
+ {3, 29, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+ /* SCC2 */
+ {3, 26, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {3, 27, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {3, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+
+ /* FCC1 */
+ {0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
+ {0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
+ {0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
+ {2, 23, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK9 */
+ {2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK10 */
+
+};
+
+static void __init init_ioports(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ksi8560_pins); i++) {
+ struct cpm_pin *pin = &ksi8560_pins[i];
+ cpm2_set_pin(pin->port, pin->pin, pin->flags);
+ }
+
+ cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
+ cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
+ cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX);
+ cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX);
+ cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK9, CPM_CLK_RX);
+ cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_TX);
+}
+#endif
+
+/*
+ * Setup the architecture
+ */
+static void __init ksi8560_setup_arch(void)
+{
+ struct device_node *cpld;
+
+ cpld = of_find_compatible_node(NULL, NULL, "emerson,KSI8560-cpld");
+ if (cpld)
+ cpld_base = of_iomap(cpld, 0);
+ else
+ printk(KERN_ERR "Can't find CPLD in device tree\n");
+
+ of_node_put(cpld);
+
+ if (ppc_md.progress)
+ ppc_md.progress("ksi8560_setup_arch()", 0);
+
+#ifdef CONFIG_CPM2
+ cpm2_reset();
+ init_ioports();
+#endif
+}
+
+static void ksi8560_show_cpuinfo(struct seq_file *m)
+{
+ uint pvid, svid, phid1;
+
+ pvid = mfspr(SPRN_PVR);
+ svid = mfspr(SPRN_SVR);
+
+ seq_printf(m, "Vendor\t\t: Emerson Network Power\n");
+ seq_printf(m, "Board\t\t: KSI8560\n");
+
+ if (cpld_base) {
+ seq_printf(m, "Hardware rev\t: %d\n",
+ in_8(cpld_base + KSI8560_CPLD_HVR));
+ seq_printf(m, "CPLD rev\t: %d\n",
+ in_8(cpld_base + KSI8560_CPLD_PVR));
+ } else
+ seq_printf(m, "Unknown Hardware and CPLD revs\n");
+
+ seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+ seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+ /* Display cpu Pll setting */
+ phid1 = mfspr(SPRN_HID1);
+ seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+machine_device_initcall(ksi8560, mpc85xx_common_publish_devices);
+
+define_machine(ksi8560) {
+ .name = "KSI8560",
+ .compatible = "emerson,KSI8560",
+ .setup_arch = ksi8560_setup_arch,
+ .init_IRQ = ksi8560_pic_init,
+ .show_cpuinfo = ksi8560_show_cpuinfo,
+ .get_irq = mpic_get_irq,
+ .restart = machine_restart,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
new file mode 100644
index 000000000..e966b2ad8
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8536 DS Board Setup
+ *
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init mpc8536_ds_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc8536_ds_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("mpc8536_ds_setup_arch()", 0);
+
+ fsl_pci_assign_primary();
+
+ swiotlb_detect_4g();
+
+ printk("MPC8536 DS board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
+
+define_machine(mpc8536_ds) {
+ .name = "MPC8536 DS",
+ .compatible = "fsl,mpc8536ds",
+ .setup_arch = mpc8536_ds_setup_arch,
+ .init_IRQ = mpc8536_ds_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h
new file mode 100644
index 000000000..c764d7551
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef MPC85xx_H
+#define MPC85xx_H
+extern int mpc85xx_common_publish_devices(void);
+
+#ifdef CONFIG_CPM2
+extern void mpc85xx_cpm2_pic_init(void);
+#else
+static inline void __init mpc85xx_cpm2_pic_init(void) {}
+#endif /* CONFIG_CPM2 */
+
+#ifdef CONFIG_QUICC_ENGINE
+extern void mpc85xx_qe_par_io_init(void);
+#else
+static inline void __init mpc85xx_qe_par_io_init(void) {}
+#endif
+
+#ifdef CONFIG_PPC_I8259
+void __init mpc85xx_8259_init(void);
+#else
+static inline void __init mpc85xx_8259_init(void) {}
+#endif
+
+#endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_8259.c b/arch/powerpc/platforms/85xx/mpc85xx_8259.c
new file mode 100644
index 000000000..cb00d596a
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_8259.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC85xx 8259 functions for DS Board Setup
+ *
+ * Author Xianghua Xiao (x.xiao@freescale.com)
+ * Roy Zang <tie-fei.zang@freescale.com>
+ * - Add PCI/PCI Express support
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+#include <asm/mpic.h>
+#include <asm/i8259.h>
+
+#include "mpc85xx.h"
+
+static void mpc85xx_8259_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int cascade_irq = i8259_irq();
+
+ if (cascade_irq)
+ generic_handle_irq(cascade_irq);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+void __init mpc85xx_8259_init(void)
+{
+ struct device_node *np;
+ struct device_node *cascade_node = NULL;
+ int cascade_irq;
+
+ /* Initialize the i8259 controller */
+ for_each_node_by_type(np, "interrupt-controller") {
+ if (of_device_is_compatible(np, "chrp,iic")) {
+ cascade_node = np;
+ break;
+ }
+ }
+
+ if (cascade_node == NULL) {
+ pr_debug("i8259: Could not find i8259 PIC\n");
+ return;
+ }
+
+ cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+ if (!cascade_irq) {
+ pr_err("i8259: Failed to map cascade interrupt\n");
+ return;
+ }
+
+ pr_debug("i8259: cascade mapped to irq %d\n", cascade_irq);
+
+ i8259_init(cascade_node, 0);
+ of_node_put(cascade_node);
+
+ irq_set_chained_handler(cascade_irq, mpc85xx_8259_cascade);
+}
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
new file mode 100644
index 000000000..285614832
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC85xx DS Board Setup
+ *
+ * Author Xianghua Xiao (x.xiao@freescale.com)
+ * Roy Zang <tie-fei.zang@freescale.com>
+ * - Add PCI/PCI Exprees support
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/i8259.h>
+#include <asm/swiotlb.h>
+#include <asm/ppc-pci.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+static void __init mpc85xx_ds_pic_init(void)
+{
+ struct mpic *mpic;
+ int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU;
+
+ if (of_machine_is_compatible("fsl,MPC8572DS-CAMP"))
+ flags |= MPIC_NO_RESET;
+
+ mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC ");
+
+ if (WARN_ON(!mpic))
+ return;
+
+ mpic_init(mpic);
+
+ mpc85xx_8259_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc85xx_ds_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("mpc85xx_ds_setup_arch()", 0);
+
+ swiotlb_detect_4g();
+ fsl_pci_assign_primary();
+ uli_init();
+ mpc85xx_smp_init();
+
+ pr_info("MPC85xx DS board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
+machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
+
+define_machine(mpc8544_ds) {
+ .name = "MPC8544 DS",
+ .compatible = "MPC8544DS",
+ .setup_arch = mpc85xx_ds_setup_arch,
+ .init_IRQ = mpc85xx_ds_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
+
+define_machine(mpc8572_ds) {
+ .name = "MPC8572 DS",
+ .compatible = "fsl,MPC8572DS",
+ .setup_arch = mpc85xx_ds_setup_arch,
+ .init_IRQ = mpc85xx_ds_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
new file mode 100644
index 000000000..c19490cf6
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2006-2010, 2012-2013 Freescale Semiconductor, Inc.
+ * All rights reserved.
+ *
+ * Author: Andy Fleming <afleming@freescale.com>
+ *
+ * Based on 83xx/mpc8360e_pb.c by:
+ * Li Yang <LeoLi@freescale.com>
+ * Yin Olivia <Hong-hua.Yin@freescale.com>
+ *
+ * Description:
+ * MPC85xx MDS board specific routines.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/reboot.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/initrd.h>
+#include <linux/fsl_devices.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/phy.h>
+#include <linux/memblock.h>
+#include <linux/fsl/guts.h>
+
+#include <linux/atomic.h>
+#include <asm/time.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/irq.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <soc/fsl/qe/qe.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#if IS_BUILTIN(CONFIG_PHYLIB)
+
+#define MV88E1111_SCR 0x10
+#define MV88E1111_SCR_125CLK 0x0010
+static int mpc8568_fixup_125_clock(struct phy_device *phydev)
+{
+ int scr;
+ int err;
+
+ /* Workaround for the 125 CLK Toggle */
+ scr = phy_read(phydev, MV88E1111_SCR);
+
+ if (scr < 0)
+ return scr;
+
+ err = phy_write(phydev, MV88E1111_SCR, scr & ~(MV88E1111_SCR_125CLK));
+
+ if (err)
+ return err;
+
+ err = phy_write(phydev, MII_BMCR, BMCR_RESET);
+
+ if (err)
+ return err;
+
+ scr = phy_read(phydev, MV88E1111_SCR);
+
+ if (scr < 0)
+ return scr;
+
+ err = phy_write(phydev, MV88E1111_SCR, scr | 0x0008);
+
+ return err;
+}
+
+static int mpc8568_mds_phy_fixups(struct phy_device *phydev)
+{
+ int temp;
+ int err;
+
+ /* Errata */
+ err = phy_write(phydev,29, 0x0006);
+
+ if (err)
+ return err;
+
+ temp = phy_read(phydev, 30);
+
+ if (temp < 0)
+ return temp;
+
+ temp = (temp & (~0x8000)) | 0x4000;
+ err = phy_write(phydev,30, temp);
+
+ if (err)
+ return err;
+
+ err = phy_write(phydev,29, 0x000a);
+
+ if (err)
+ return err;
+
+ temp = phy_read(phydev, 30);
+
+ if (temp < 0)
+ return temp;
+
+ temp = phy_read(phydev, 30);
+
+ if (temp < 0)
+ return temp;
+
+ temp &= ~0x0020;
+
+ err = phy_write(phydev,30,temp);
+
+ if (err)
+ return err;
+
+ /* Disable automatic MDI/MDIX selection */
+ temp = phy_read(phydev, 16);
+
+ if (temp < 0)
+ return temp;
+
+ temp &= ~0x0060;
+ err = phy_write(phydev,16,temp);
+
+ return err;
+}
+
+#endif
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+#ifdef CONFIG_QUICC_ENGINE
+static void __init mpc85xx_mds_reset_ucc_phys(void)
+{
+ struct device_node *np;
+ static u8 __iomem *bcsr_regs;
+
+ /* Map BCSR area */
+ np = of_find_node_by_name(NULL, "bcsr");
+ if (!np)
+ return;
+
+ bcsr_regs = of_iomap(np, 0);
+ of_node_put(np);
+ if (!bcsr_regs)
+ return;
+
+ if (machine_is(mpc8568_mds)) {
+#define BCSR_UCC1_GETH_EN (0x1 << 7)
+#define BCSR_UCC2_GETH_EN (0x1 << 7)
+#define BCSR_UCC1_MODE_MSK (0x3 << 4)
+#define BCSR_UCC2_MODE_MSK (0x3 << 0)
+
+ /* Turn off UCC1 & UCC2 */
+ clrbits8(&bcsr_regs[8], BCSR_UCC1_GETH_EN);
+ clrbits8(&bcsr_regs[9], BCSR_UCC2_GETH_EN);
+
+ /* Mode is RGMII, all bits clear */
+ clrbits8(&bcsr_regs[11], BCSR_UCC1_MODE_MSK |
+ BCSR_UCC2_MODE_MSK);
+
+ /* Turn UCC1 & UCC2 on */
+ setbits8(&bcsr_regs[8], BCSR_UCC1_GETH_EN);
+ setbits8(&bcsr_regs[9], BCSR_UCC2_GETH_EN);
+ } else if (machine_is(mpc8569_mds)) {
+#define BCSR7_UCC12_GETHnRST (0x1 << 2)
+#define BCSR8_UEM_MARVELL_RST (0x1 << 1)
+#define BCSR_UCC_RGMII (0x1 << 6)
+#define BCSR_UCC_RTBI (0x1 << 5)
+ /*
+ * U-Boot mangles interrupt polarity for Marvell PHYs,
+ * so reset built-in and UEM Marvell PHYs, this puts
+ * the PHYs into their normal state.
+ */
+ clrbits8(&bcsr_regs[7], BCSR7_UCC12_GETHnRST);
+ setbits8(&bcsr_regs[8], BCSR8_UEM_MARVELL_RST);
+
+ setbits8(&bcsr_regs[7], BCSR7_UCC12_GETHnRST);
+ clrbits8(&bcsr_regs[8], BCSR8_UEM_MARVELL_RST);
+
+ for_each_compatible_node(np, "network", "ucc_geth") {
+ const unsigned int *prop;
+ int ucc_num;
+
+ prop = of_get_property(np, "cell-index", NULL);
+ if (prop == NULL)
+ continue;
+
+ ucc_num = *prop - 1;
+
+ prop = of_get_property(np, "phy-connection-type", NULL);
+ if (prop == NULL)
+ continue;
+
+ if (strcmp("rtbi", (const char *)prop) == 0)
+ clrsetbits_8(&bcsr_regs[7 + ucc_num],
+ BCSR_UCC_RGMII, BCSR_UCC_RTBI);
+ }
+ } else if (machine_is(p1021_mds)) {
+#define BCSR11_ENET_MICRST (0x1 << 5)
+ /* Reset Micrel PHY */
+ clrbits8(&bcsr_regs[11], BCSR11_ENET_MICRST);
+ setbits8(&bcsr_regs[11], BCSR11_ENET_MICRST);
+ }
+
+ iounmap(bcsr_regs);
+}
+
+static void __init mpc85xx_mds_qe_init(void)
+{
+ struct device_node *np;
+
+ mpc85xx_qe_par_io_init();
+ mpc85xx_mds_reset_ucc_phys();
+
+ if (machine_is(p1021_mds)) {
+
+ struct ccsr_guts __iomem *guts;
+
+ np = of_find_node_by_name(NULL, "global-utilities");
+ if (np) {
+ guts = of_iomap(np, 0);
+ if (!guts)
+ pr_err("mpc85xx-rdb: could not map global utilities register\n");
+ else{
+ /* P1021 has pins muxed for QE and other functions. To
+ * enable QE UEC mode, we need to set bit QE0 for UCC1
+ * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
+ * and QE12 for QE MII management signals in PMUXCR
+ * register.
+ */
+ setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
+ MPC85xx_PMUXCR_QE(3) |
+ MPC85xx_PMUXCR_QE(9) |
+ MPC85xx_PMUXCR_QE(12));
+ iounmap(guts);
+ }
+ of_node_put(np);
+ }
+
+ }
+}
+
+#else
+static void __init mpc85xx_mds_qe_init(void) { }
+#endif /* CONFIG_QUICC_ENGINE */
+
+static void __init mpc85xx_mds_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("mpc85xx_mds_setup_arch()", 0);
+
+ mpc85xx_smp_init();
+
+ mpc85xx_mds_qe_init();
+
+ fsl_pci_assign_primary();
+
+ swiotlb_detect_4g();
+}
+
+#if IS_BUILTIN(CONFIG_PHYLIB)
+
+static int __init board_fixups(void)
+{
+ char phy_id[20];
+ char *compstrs[2] = {"fsl,gianfar-mdio", "fsl,ucc-mdio"};
+ struct device_node *mdio;
+ struct resource res;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(compstrs); i++) {
+ mdio = of_find_compatible_node(NULL, NULL, compstrs[i]);
+
+ of_address_to_resource(mdio, 0, &res);
+ snprintf(phy_id, sizeof(phy_id), "%llx:%02x",
+ (unsigned long long)res.start, 1);
+
+ phy_register_fixup_for_id(phy_id, mpc8568_fixup_125_clock);
+ phy_register_fixup_for_id(phy_id, mpc8568_mds_phy_fixups);
+
+ /* Register a workaround for errata */
+ snprintf(phy_id, sizeof(phy_id), "%llx:%02x",
+ (unsigned long long)res.start, 7);
+ phy_register_fixup_for_id(phy_id, mpc8568_mds_phy_fixups);
+
+ of_node_put(mdio);
+ }
+
+ return 0;
+}
+
+machine_arch_initcall(mpc8568_mds, board_fixups);
+machine_arch_initcall(mpc8569_mds, board_fixups);
+
+#endif
+
+static int __init mpc85xx_publish_devices(void)
+{
+ return mpc85xx_common_publish_devices();
+}
+
+machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices);
+machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices);
+machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices);
+
+static void __init mpc85xx_mds_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+
+ mpic_init(mpic);
+}
+
+define_machine(mpc8568_mds) {
+ .name = "MPC8568 MDS",
+ .compatible = "MPC85xxMDS",
+ .setup_arch = mpc85xx_mds_setup_arch,
+ .init_IRQ = mpc85xx_mds_pic_init,
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+};
+
+define_machine(mpc8569_mds) {
+ .name = "MPC8569 MDS",
+ .compatible = "fsl,MPC8569EMDS",
+ .setup_arch = mpc85xx_mds_setup_arch,
+ .init_IRQ = mpc85xx_mds_pic_init,
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+};
+
+define_machine(p1021_mds) {
+ .name = "P1021 MDS",
+ .compatible = "fsl,P1021MDS",
+ .setup_arch = mpc85xx_mds_setup_arch,
+ .init_IRQ = mpc85xx_mds_pic_init,
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
new file mode 100644
index 000000000..f7ac92a8a
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC85xx PM operators
+ *
+ * Copyright 2015 Freescale Semiconductor Inc.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/fsl/guts.h>
+
+#include <asm/io.h>
+#include <asm/fsl_pm.h>
+
+#include "smp.h"
+
+static struct ccsr_guts __iomem *guts;
+
+#ifdef CONFIG_FSL_PMC
+static void mpc85xx_irq_mask(int cpu)
+{
+
+}
+
+static void mpc85xx_irq_unmask(int cpu)
+{
+
+}
+
+static void mpc85xx_cpu_die(int cpu)
+{
+ u32 tmp;
+
+ tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP;
+ mtspr(SPRN_HID0, tmp);
+
+ /* Enter NAP mode. */
+ tmp = mfmsr();
+ tmp |= MSR_WE;
+ asm volatile(
+ "msync\n"
+ "mtmsr %0\n"
+ "isync\n"
+ :
+ : "r" (tmp));
+}
+
+static void mpc85xx_cpu_up_prepare(int cpu)
+{
+
+}
+#endif
+
+static void mpc85xx_freeze_time_base(bool freeze)
+{
+ uint32_t mask;
+
+ mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1;
+ if (freeze)
+ setbits32(&guts->devdisr, mask);
+ else
+ clrbits32(&guts->devdisr, mask);
+
+ in_be32(&guts->devdisr);
+}
+
+static const struct of_device_id mpc85xx_smp_guts_ids[] = {
+ { .compatible = "fsl,mpc8572-guts", },
+ { .compatible = "fsl,p1020-guts", },
+ { .compatible = "fsl,p1021-guts", },
+ { .compatible = "fsl,p1022-guts", },
+ { .compatible = "fsl,p1023-guts", },
+ { .compatible = "fsl,p2020-guts", },
+ { .compatible = "fsl,bsc9132-guts", },
+ {},
+};
+
+static const struct fsl_pm_ops mpc85xx_pm_ops = {
+ .freeze_time_base = mpc85xx_freeze_time_base,
+#ifdef CONFIG_FSL_PMC
+ .irq_mask = mpc85xx_irq_mask,
+ .irq_unmask = mpc85xx_irq_unmask,
+ .cpu_die = mpc85xx_cpu_die,
+ .cpu_up_prepare = mpc85xx_cpu_up_prepare,
+#endif
+};
+
+int __init mpc85xx_setup_pmc(void)
+{
+ struct device_node *np;
+
+ np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids);
+ if (np) {
+ guts = of_iomap(np, 0);
+ of_node_put(np);
+ if (!guts) {
+ pr_err("Could not map guts node address\n");
+ return -ENOMEM;
+ }
+ qoriq_pm_ops = &mpc85xx_pm_ops;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
new file mode 100644
index 000000000..ec9f60fbe
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC85xx RDB Board Setup
+ *
+ * Copyright 2009,2012-2013 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/fsl/guts.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <soc/fsl/qe/qe.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+static void __init mpc85xx_rdb_pic_init(void)
+{
+ struct mpic *mpic;
+ int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU;
+
+ if (of_machine_is_compatible("fsl,MPC85XXRDB-CAMP"))
+ flags |= MPIC_NO_RESET;
+
+ mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC ");
+
+ if (WARN_ON(!mpic))
+ return;
+
+ mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mpc85xx_rdb_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("mpc85xx_rdb_setup_arch()", 0);
+
+ mpc85xx_smp_init();
+
+ fsl_pci_assign_primary();
+
+ mpc85xx_qe_par_io_init();
+#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
+ if (machine_is(p1025_rdb)) {
+ struct device_node *np;
+
+ struct ccsr_guts __iomem *guts;
+
+ np = of_find_node_by_name(NULL, "global-utilities");
+ if (np) {
+ guts = of_iomap(np, 0);
+ if (!guts) {
+
+ pr_err("mpc85xx-rdb: could not map global utilities register\n");
+
+ } else {
+ /* P1025 has pins muxed for QE and other functions. To
+ * enable QE UEC mode, we need to set bit QE0 for UCC1
+ * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
+ * and QE12 for QE MII management singals in PMUXCR
+ * register.
+ */
+ setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
+ MPC85xx_PMUXCR_QE(3) |
+ MPC85xx_PMUXCR_QE(9) |
+ MPC85xx_PMUXCR_QE(12));
+ iounmap(guts);
+ }
+ of_node_put(np);
+ }
+
+ }
+#endif
+
+ pr_info("MPC85xx RDB board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_rdb_pd, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1020_utm_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1025_rdb, mpc85xx_common_publish_devices);
+machine_arch_initcall(p1024_rdb, mpc85xx_common_publish_devices);
+
+define_machine(p1020_rdb) {
+ .name = "P1020 RDB",
+ .compatible = "fsl,P1020RDB",
+ .setup_arch = mpc85xx_rdb_setup_arch,
+ .init_IRQ = mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
+
+define_machine(p1021_rdb_pc) {
+ .name = "P1021 RDB-PC",
+ .compatible = "fsl,P1021RDB-PC",
+ .setup_arch = mpc85xx_rdb_setup_arch,
+ .init_IRQ = mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
+
+define_machine(p1025_rdb) {
+ .name = "P1025 RDB",
+ .compatible = "fsl,P1025RDB",
+ .setup_arch = mpc85xx_rdb_setup_arch,
+ .init_IRQ = mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
+
+define_machine(p1020_mbg_pc) {
+ .name = "P1020 MBG-PC",
+ .compatible = "fsl,P1020MBG-PC",
+ .setup_arch = mpc85xx_rdb_setup_arch,
+ .init_IRQ = mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
+
+define_machine(p1020_utm_pc) {
+ .name = "P1020 UTM-PC",
+ .compatible = "fsl,P1020UTM-PC",
+ .setup_arch = mpc85xx_rdb_setup_arch,
+ .init_IRQ = mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
+
+define_machine(p1020_rdb_pc) {
+ .name = "P1020RDB-PC",
+ .compatible = "fsl,P1020RDB-PC",
+ .setup_arch = mpc85xx_rdb_setup_arch,
+ .init_IRQ = mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
+
+define_machine(p1020_rdb_pd) {
+ .name = "P1020RDB-PD",
+ .compatible = "fsl,P1020RDB-PD",
+ .setup_arch = mpc85xx_rdb_setup_arch,
+ .init_IRQ = mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
+
+define_machine(p1024_rdb) {
+ .name = "P1024 RDB",
+ .compatible = "fsl,P1024RDB",
+ .setup_arch = mpc85xx_rdb_setup_arch,
+ .init_IRQ = mpc85xx_rdb_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c
new file mode 100644
index 000000000..1b59e45a0
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mvme2500.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Board setup routines for the Emerson/Artesyn MVME2500
+ *
+ * Copyright 2014 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Based on earlier code by:
+ *
+ * Xianghua Xiao (x.xiao@freescale.com)
+ * Tom Armistead (tom.armistead@emerson.com)
+ * Copyright 2012 Emerson
+ *
+ * Author Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ */
+
+#include <linux/pci.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init mvme2500_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0,
+ MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme2500_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("mvme2500_setup_arch()", 0);
+ fsl_pci_assign_primary();
+ pr_info("MVME2500 board from Artesyn\n");
+}
+
+machine_arch_initcall(mvme2500, mpc85xx_common_publish_devices);
+
+define_machine(mvme2500) {
+ .name = "MVME2500",
+ .compatible = "artesyn,MVME2500",
+ .setup_arch = mvme2500_setup_arch,
+ .init_IRQ = mvme2500_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
new file mode 100644
index 000000000..10d6f1fa3
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1010RDB Board Setup
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+void __init p1010_rdb_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+
+ BUG_ON(mpic == NULL);
+
+ mpic_init(mpic);
+}
+
+
+/*
+ * Setup the architecture
+ */
+static void __init p1010_rdb_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("p1010_rdb_setup_arch()", 0);
+
+ fsl_pci_assign_primary();
+
+ printk(KERN_INFO "P1010 RDB board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices);
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p1010_rdb_probe(void)
+{
+ if (of_machine_is_compatible("fsl,P1010RDB"))
+ return 1;
+ if (of_machine_is_compatible("fsl,P1010RDB-PB"))
+ return 1;
+ return 0;
+}
+
+define_machine(p1010_rdb) {
+ .name = "P1010 RDB",
+ .probe = p1010_rdb_probe,
+ .setup_arch = p1010_rdb_setup_arch,
+ .init_IRQ = p1010_rdb_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
new file mode 100644
index 000000000..0dd786a06
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -0,0 +1,563 @@
+/*
+ * P1022DS board specific routines
+ *
+ * Authors: Travis Wheatley <travis.wheatley@freescale.com>
+ * Dave Liu <daveliu@freescale.com>
+ * Timur Tabi <timur@freescale.com>
+ *
+ * Copyright 2010 Freescale Semiconductor, Inc.
+ *
+ * This file is taken from the Freescale P1022DS BSP, with modifications:
+ * 2) No AMP support
+ * 3) No PCI endpoint support
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/fsl/guts.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/div64.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/udbg.h>
+#include <asm/fsl_lbc.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+#define PMUXCR_ELBCDIU_MASK 0xc0000000
+#define PMUXCR_ELBCDIU_NOR16 0x80000000
+#define PMUXCR_ELBCDIU_DIU 0x40000000
+
+/*
+ * Board-specific initialization of the DIU. This code should probably be
+ * executed when the DIU is opened, rather than in arch code, but the DIU
+ * driver does not have a mechanism for this (yet).
+ *
+ * This is especially problematic on the P1022DS because the local bus (eLBC)
+ * and the DIU video signals share the same pins, which means that enabling the
+ * DIU will disable access to NOR flash.
+ */
+
+/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */
+#define CLKDVDR_PXCKEN 0x80000000
+#define CLKDVDR_PXCKINV 0x10000000
+#define CLKDVDR_PXCKDLY 0x06000000
+#define CLKDVDR_PXCLK_MASK 0x00FF0000
+
+/* Some ngPIXIS register definitions */
+#define PX_CTL 3
+#define PX_BRDCFG0 8
+#define PX_BRDCFG1 9
+
+#define PX_BRDCFG0_ELBC_SPI_MASK 0xc0
+#define PX_BRDCFG0_ELBC_SPI_ELBC 0x00
+#define PX_BRDCFG0_ELBC_SPI_NULL 0xc0
+#define PX_BRDCFG0_ELBC_DIU 0x02
+
+#define PX_BRDCFG1_DVIEN 0x80
+#define PX_BRDCFG1_DFPEN 0x40
+#define PX_BRDCFG1_BACKLIGHT 0x20
+#define PX_BRDCFG1_DDCEN 0x10
+
+#define PX_CTL_ALTACC 0x80
+
+/*
+ * DIU Area Descriptor
+ *
+ * Note that we need to byte-swap the value before it's written to the AD
+ * register. So even though the registers don't look like they're in the same
+ * bit positions as they are on the MPC8610, the same value is written to the
+ * AD register on the MPC8610 and on the P1022.
+ */
+#define AD_BYTE_F 0x10000000
+#define AD_ALPHA_C_MASK 0x0E000000
+#define AD_ALPHA_C_SHIFT 25
+#define AD_BLUE_C_MASK 0x01800000
+#define AD_BLUE_C_SHIFT 23
+#define AD_GREEN_C_MASK 0x00600000
+#define AD_GREEN_C_SHIFT 21
+#define AD_RED_C_MASK 0x00180000
+#define AD_RED_C_SHIFT 19
+#define AD_PALETTE 0x00040000
+#define AD_PIXEL_S_MASK 0x00030000
+#define AD_PIXEL_S_SHIFT 16
+#define AD_COMP_3_MASK 0x0000F000
+#define AD_COMP_3_SHIFT 12
+#define AD_COMP_2_MASK 0x00000F00
+#define AD_COMP_2_SHIFT 8
+#define AD_COMP_1_MASK 0x000000F0
+#define AD_COMP_1_SHIFT 4
+#define AD_COMP_0_MASK 0x0000000F
+#define AD_COMP_0_SHIFT 0
+
+#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \
+ cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \
+ (blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \
+ (red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \
+ (c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \
+ (c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT))
+
+struct fsl_law {
+ u32 lawbar;
+ u32 reserved1;
+ u32 lawar;
+ u32 reserved[5];
+};
+
+#define LAWBAR_MASK 0x00F00000
+#define LAWBAR_SHIFT 12
+
+#define LAWAR_EN 0x80000000
+#define LAWAR_TGT_MASK 0x01F00000
+#define LAW_TRGT_IF_LBC (0x04 << 20)
+
+#define LAWAR_MASK (LAWAR_EN | LAWAR_TGT_MASK)
+#define LAWAR_MATCH (LAWAR_EN | LAW_TRGT_IF_LBC)
+
+#define BR_BA 0xFFFF8000
+
+/*
+ * Map a BRx value to a physical address
+ *
+ * The localbus BRx registers only store the lower 32 bits of the address. To
+ * obtain the upper four bits, we need to scan the LAW table. The entry which
+ * maps to the localbus will contain the upper four bits.
+ */
+static phys_addr_t lbc_br_to_phys(const void *ecm, unsigned int count, u32 br)
+{
+#ifndef CONFIG_PHYS_64BIT
+ /*
+ * If we only have 32-bit addressing, then the BRx address *is* the
+ * physical address.
+ */
+ return br & BR_BA;
+#else
+ const struct fsl_law *law = ecm + 0xc08;
+ unsigned int i;
+
+ for (i = 0; i < count; i++) {
+ u64 lawbar = in_be32(&law[i].lawbar);
+ u32 lawar = in_be32(&law[i].lawar);
+
+ if ((lawar & LAWAR_MASK) == LAWAR_MATCH)
+ /* Extract the upper four bits */
+ return (br & BR_BA) | ((lawbar & LAWBAR_MASK) << 12);
+ }
+
+ return 0;
+#endif
+}
+
+/**
+ * p1022ds_set_monitor_port: switch the output to a different monitor port
+ */
+static void p1022ds_set_monitor_port(enum fsl_diu_monitor_port port)
+{
+ struct device_node *guts_node;
+ struct device_node *lbc_node = NULL;
+ struct device_node *law_node = NULL;
+ struct ccsr_guts __iomem *guts;
+ struct fsl_lbc_regs *lbc = NULL;
+ void *ecm = NULL;
+ u8 __iomem *lbc_lcs0_ba = NULL;
+ u8 __iomem *lbc_lcs1_ba = NULL;
+ phys_addr_t cs0_addr, cs1_addr;
+ u32 br0, or0, br1, or1;
+ const __be32 *iprop;
+ unsigned int num_laws;
+ u8 b;
+
+ /* Map the global utilities registers. */
+ guts_node = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
+ if (!guts_node) {
+ pr_err("p1022ds: missing global utilities device node\n");
+ return;
+ }
+
+ guts = of_iomap(guts_node, 0);
+ if (!guts) {
+ pr_err("p1022ds: could not map global utilities device\n");
+ goto exit;
+ }
+
+ lbc_node = of_find_compatible_node(NULL, NULL, "fsl,p1022-elbc");
+ if (!lbc_node) {
+ pr_err("p1022ds: missing localbus node\n");
+ goto exit;
+ }
+
+ lbc = of_iomap(lbc_node, 0);
+ if (!lbc) {
+ pr_err("p1022ds: could not map localbus node\n");
+ goto exit;
+ }
+
+ law_node = of_find_compatible_node(NULL, NULL, "fsl,ecm-law");
+ if (!law_node) {
+ pr_err("p1022ds: missing local access window node\n");
+ goto exit;
+ }
+
+ ecm = of_iomap(law_node, 0);
+ if (!ecm) {
+ pr_err("p1022ds: could not map local access window node\n");
+ goto exit;
+ }
+
+ iprop = of_get_property(law_node, "fsl,num-laws", NULL);
+ if (!iprop) {
+ pr_err("p1022ds: LAW node is missing fsl,num-laws property\n");
+ goto exit;
+ }
+ num_laws = be32_to_cpup(iprop);
+
+ /*
+ * Indirect mode requires both BR0 and BR1 to be set to "GPCM",
+ * otherwise writes to these addresses won't actually appear on the
+ * local bus, and so the PIXIS won't see them.
+ *
+ * In FCM mode, writes go to the NAND controller, which does not pass
+ * them to the localbus directly. So we force BR0 and BR1 into GPCM
+ * mode, since we don't care about what's behind the localbus any
+ * more.
+ */
+ br0 = in_be32(&lbc->bank[0].br);
+ br1 = in_be32(&lbc->bank[1].br);
+ or0 = in_be32(&lbc->bank[0].or);
+ or1 = in_be32(&lbc->bank[1].or);
+
+ /* Make sure CS0 and CS1 are programmed */
+ if (!(br0 & BR_V) || !(br1 & BR_V)) {
+ pr_err("p1022ds: CS0 and/or CS1 is not programmed\n");
+ goto exit;
+ }
+
+ /*
+ * Use the existing BRx/ORx values if it's already GPCM. Otherwise,
+ * force the values to simple 32KB GPCM windows with the most
+ * conservative timing.
+ */
+ if ((br0 & BR_MSEL) != BR_MS_GPCM) {
+ br0 = (br0 & BR_BA) | BR_V;
+ or0 = 0xFFFF8000 | 0xFF7;
+ out_be32(&lbc->bank[0].br, br0);
+ out_be32(&lbc->bank[0].or, or0);
+ }
+ if ((br1 & BR_MSEL) != BR_MS_GPCM) {
+ br1 = (br1 & BR_BA) | BR_V;
+ or1 = 0xFFFF8000 | 0xFF7;
+ out_be32(&lbc->bank[1].br, br1);
+ out_be32(&lbc->bank[1].or, or1);
+ }
+
+ cs0_addr = lbc_br_to_phys(ecm, num_laws, br0);
+ if (!cs0_addr) {
+ pr_err("p1022ds: could not determine physical address for CS0"
+ " (BR0=%08x)\n", br0);
+ goto exit;
+ }
+ cs1_addr = lbc_br_to_phys(ecm, num_laws, br1);
+ if (!cs1_addr) {
+ pr_err("p1022ds: could not determine physical address for CS1"
+ " (BR1=%08x)\n", br1);
+ goto exit;
+ }
+
+ lbc_lcs0_ba = ioremap(cs0_addr, 1);
+ if (!lbc_lcs0_ba) {
+ pr_err("p1022ds: could not ioremap CS0 address %llx\n",
+ (unsigned long long)cs0_addr);
+ goto exit;
+ }
+ lbc_lcs1_ba = ioremap(cs1_addr, 1);
+ if (!lbc_lcs1_ba) {
+ pr_err("p1022ds: could not ioremap CS1 address %llx\n",
+ (unsigned long long)cs1_addr);
+ goto exit;
+ }
+
+ /* Make sure we're in indirect mode first. */
+ if ((in_be32(&guts->pmuxcr) & PMUXCR_ELBCDIU_MASK) !=
+ PMUXCR_ELBCDIU_DIU) {
+ struct device_node *pixis_node;
+ void __iomem *pixis;
+
+ pixis_node =
+ of_find_compatible_node(NULL, NULL, "fsl,p1022ds-fpga");
+ if (!pixis_node) {
+ pr_err("p1022ds: missing pixis node\n");
+ goto exit;
+ }
+
+ pixis = of_iomap(pixis_node, 0);
+ of_node_put(pixis_node);
+ if (!pixis) {
+ pr_err("p1022ds: could not map pixis registers\n");
+ goto exit;
+ }
+
+ /* Enable indirect PIXIS mode. */
+ setbits8(pixis + PX_CTL, PX_CTL_ALTACC);
+ iounmap(pixis);
+
+ /* Switch the board mux to the DIU */
+ out_8(lbc_lcs0_ba, PX_BRDCFG0); /* BRDCFG0 */
+ b = in_8(lbc_lcs1_ba);
+ b |= PX_BRDCFG0_ELBC_DIU;
+ out_8(lbc_lcs1_ba, b);
+
+ /* Set the chip mux to DIU mode. */
+ clrsetbits_be32(&guts->pmuxcr, PMUXCR_ELBCDIU_MASK,
+ PMUXCR_ELBCDIU_DIU);
+ in_be32(&guts->pmuxcr);
+ }
+
+
+ switch (port) {
+ case FSL_DIU_PORT_DVI:
+ /* Enable the DVI port, disable the DFP and the backlight */
+ out_8(lbc_lcs0_ba, PX_BRDCFG1);
+ b = in_8(lbc_lcs1_ba);
+ b &= ~(PX_BRDCFG1_DFPEN | PX_BRDCFG1_BACKLIGHT);
+ b |= PX_BRDCFG1_DVIEN;
+ out_8(lbc_lcs1_ba, b);
+ break;
+ case FSL_DIU_PORT_LVDS:
+ /*
+ * LVDS also needs backlight enabled, otherwise the display
+ * will be blank.
+ */
+ /* Enable the DFP port, disable the DVI and the backlight */
+ out_8(lbc_lcs0_ba, PX_BRDCFG1);
+ b = in_8(lbc_lcs1_ba);
+ b &= ~PX_BRDCFG1_DVIEN;
+ b |= PX_BRDCFG1_DFPEN | PX_BRDCFG1_BACKLIGHT;
+ out_8(lbc_lcs1_ba, b);
+ break;
+ default:
+ pr_err("p1022ds: unsupported monitor port %i\n", port);
+ }
+
+exit:
+ if (lbc_lcs1_ba)
+ iounmap(lbc_lcs1_ba);
+ if (lbc_lcs0_ba)
+ iounmap(lbc_lcs0_ba);
+ if (lbc)
+ iounmap(lbc);
+ if (ecm)
+ iounmap(ecm);
+ if (guts)
+ iounmap(guts);
+
+ of_node_put(law_node);
+ of_node_put(lbc_node);
+ of_node_put(guts_node);
+}
+
+/**
+ * p1022ds_set_pixel_clock: program the DIU's clock
+ *
+ * @pixclock: the wavelength, in picoseconds, of the clock
+ */
+void p1022ds_set_pixel_clock(unsigned int pixclock)
+{
+ struct device_node *guts_np = NULL;
+ struct ccsr_guts __iomem *guts;
+ unsigned long freq;
+ u64 temp;
+ u32 pxclk;
+
+ /* Map the global utilities registers. */
+ guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
+ if (!guts_np) {
+ pr_err("p1022ds: missing global utilities device node\n");
+ return;
+ }
+
+ guts = of_iomap(guts_np, 0);
+ of_node_put(guts_np);
+ if (!guts) {
+ pr_err("p1022ds: could not map global utilities device\n");
+ return;
+ }
+
+ /* Convert pixclock from a wavelength to a frequency */
+ temp = 1000000000000ULL;
+ do_div(temp, pixclock);
+ freq = temp;
+
+ /*
+ * 'pxclk' is the ratio of the platform clock to the pixel clock.
+ * This number is programmed into the CLKDVDR register, and the valid
+ * range of values is 2-255.
+ */
+ pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq);
+ pxclk = clamp_t(u32, pxclk, 2, 255);
+
+ /* Disable the pixel clock, and set it to non-inverted and no delay */
+ clrbits32(&guts->clkdvdr,
+ CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
+
+ /* Enable the clock and set the pxclk */
+ setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
+
+ iounmap(guts);
+}
+
+/**
+ * p1022ds_valid_monitor_port: set the monitor port for sysfs
+ */
+enum fsl_diu_monitor_port
+p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+ switch (port) {
+ case FSL_DIU_PORT_DVI:
+ case FSL_DIU_PORT_LVDS:
+ return port;
+ default:
+ return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */
+ }
+}
+
+#endif
+
+void __init p1022_ds_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+}
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+/* TRUE if there is a "video=fslfb" command-line parameter. */
+static bool fslfb;
+
+/*
+ * Search for a "video=fslfb" command-line parameter, and set 'fslfb' to
+ * true if we find it.
+ *
+ * We need to use early_param() instead of __setup() because the normal
+ * __setup() gets called to late. However, early_param() gets called very
+ * early, before the device tree is unflattened, so all we can do now is set a
+ * global variable. Later on, p1022_ds_setup_arch() will use that variable
+ * to determine if we need to update the device tree.
+ */
+static int __init early_video_setup(char *options)
+{
+ fslfb = (strncmp(options, "fslfb:", 6) == 0);
+
+ return 0;
+}
+early_param("video", early_video_setup);
+
+#endif
+
+/*
+ * Setup the architecture
+ */
+static void __init p1022_ds_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("p1022_ds_setup_arch()", 0);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+ diu_ops.set_monitor_port = p1022ds_set_monitor_port;
+ diu_ops.set_pixel_clock = p1022ds_set_pixel_clock;
+ diu_ops.valid_monitor_port = p1022ds_valid_monitor_port;
+
+ /*
+ * Disable the NOR and NAND flash nodes if there is video=fslfb...
+ * command-line parameter. When the DIU is active, the localbus is
+ * unavailable, so we have to disable these nodes before the MTD
+ * driver loads.
+ */
+ if (fslfb) {
+ struct device_node *np =
+ of_find_compatible_node(NULL, NULL, "fsl,p1022-elbc");
+
+ if (np) {
+ struct device_node *np2;
+
+ of_node_get(np);
+ np2 = of_find_compatible_node(np, NULL, "cfi-flash");
+ if (np2) {
+ static struct property nor_status = {
+ .name = "status",
+ .value = "disabled",
+ .length = sizeof("disabled"),
+ };
+
+ /*
+ * of_update_property() is called before
+ * kmalloc() is available, so the 'new' object
+ * should be allocated in the global area.
+ * The easiest way is to do that is to
+ * allocate one static local variable for each
+ * call to this function.
+ */
+ pr_info("p1022ds: disabling %pOF node",
+ np2);
+ of_update_property(np2, &nor_status);
+ of_node_put(np2);
+ }
+
+ of_node_get(np);
+ np2 = of_find_compatible_node(np, NULL,
+ "fsl,elbc-fcm-nand");
+ if (np2) {
+ static struct property nand_status = {
+ .name = "status",
+ .value = "disabled",
+ .length = sizeof("disabled"),
+ };
+
+ pr_info("p1022ds: disabling %pOF node",
+ np2);
+ of_update_property(np2, &nand_status);
+ of_node_put(np2);
+ }
+
+ of_node_put(np);
+ }
+
+ }
+
+#endif
+
+ mpc85xx_smp_init();
+
+ fsl_pci_assign_primary();
+
+ swiotlb_detect_4g();
+
+ pr_info("Freescale P1022 DS reference board\n");
+}
+
+machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices);
+
+define_machine(p1022_ds) {
+ .name = "P1022 DS",
+ .compatible = "fsl,p1022ds",
+ .setup_arch = p1022_ds_setup_arch,
+ .init_IRQ = p1022_ds_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
new file mode 100644
index 000000000..25ab6e9c1
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -0,0 +1,143 @@
+/*
+ * P1022 RDK board specific routines
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * Based on p1022_ds.c
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/fsl/guts.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/div64.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include <asm/udbg.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */
+#define CLKDVDR_PXCKEN 0x80000000
+#define CLKDVDR_PXCKINV 0x10000000
+#define CLKDVDR_PXCKDLY 0x06000000
+#define CLKDVDR_PXCLK_MASK 0x00FF0000
+
+/**
+ * p1022rdk_set_pixel_clock: program the DIU's clock
+ *
+ * @pixclock: the wavelength, in picoseconds, of the clock
+ */
+void p1022rdk_set_pixel_clock(unsigned int pixclock)
+{
+ struct device_node *guts_np = NULL;
+ struct ccsr_guts __iomem *guts;
+ unsigned long freq;
+ u64 temp;
+ u32 pxclk;
+
+ /* Map the global utilities registers. */
+ guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
+ if (!guts_np) {
+ pr_err("p1022rdk: missing global utilities device node\n");
+ return;
+ }
+
+ guts = of_iomap(guts_np, 0);
+ of_node_put(guts_np);
+ if (!guts) {
+ pr_err("p1022rdk: could not map global utilities device\n");
+ return;
+ }
+
+ /* Convert pixclock from a wavelength to a frequency */
+ temp = 1000000000000ULL;
+ do_div(temp, pixclock);
+ freq = temp;
+
+ /*
+ * 'pxclk' is the ratio of the platform clock to the pixel clock.
+ * This number is programmed into the CLKDVDR register, and the valid
+ * range of values is 2-255.
+ */
+ pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq);
+ pxclk = clamp_t(u32, pxclk, 2, 255);
+
+ /* Disable the pixel clock, and set it to non-inverted and no delay */
+ clrbits32(&guts->clkdvdr,
+ CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
+
+ /* Enable the clock and set the pxclk */
+ setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
+
+ iounmap(guts);
+}
+
+/**
+ * p1022rdk_valid_monitor_port: set the monitor port for sysfs
+ */
+enum fsl_diu_monitor_port
+p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+ return FSL_DIU_PORT_DVI;
+}
+
+#endif
+
+void __init p1022_rdk_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init p1022_rdk_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("p1022_rdk_setup_arch()", 0);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+ diu_ops.set_pixel_clock = p1022rdk_set_pixel_clock;
+ diu_ops.valid_monitor_port = p1022rdk_valid_monitor_port;
+#endif
+
+ mpc85xx_smp_init();
+
+ fsl_pci_assign_primary();
+
+ swiotlb_detect_4g();
+
+ pr_info("Freescale / iVeia P1022 RDK reference board\n");
+}
+
+machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices);
+
+define_machine(p1022_rdk) {
+ .name = "P1022 RDK",
+ .compatible = "fsl,p1022rdk",
+ .setup_arch = p1022_rdk_setup_arch,
+ .init_IRQ = p1022_rdk_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1023_rdb.c b/arch/powerpc/platforms/85xx/p1023_rdb.c
new file mode 100644
index 000000000..e4fa8731f
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p1023_rdb.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2010-2011, 2013 Freescale Semiconductor, Inc.
+ *
+ * Author: Roy Zang <tie-fei.zang@freescale.com>
+ *
+ * Description:
+ * P1023 RDB Board Setup
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/fsl_devices.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include "smp.h"
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init p1023_rdb_setup_arch(void)
+{
+ struct device_node *np;
+
+ if (ppc_md.progress)
+ ppc_md.progress("p1023_rdb_setup_arch()", 0);
+
+ /* Map BCSR area */
+ np = of_find_node_by_name(NULL, "bcsr");
+ if (np != NULL) {
+ static u8 __iomem *bcsr_regs;
+
+ bcsr_regs = of_iomap(np, 0);
+ of_node_put(np);
+
+ if (!bcsr_regs) {
+ printk(KERN_ERR
+ "BCSR: Failed to map bcsr register space\n");
+ return;
+ } else {
+#define BCSR15_I2C_BUS0_SEG_CLR 0x07
+#define BCSR15_I2C_BUS0_SEG2 0x02
+/*
+ * Note: Accessing exclusively i2c devices.
+ *
+ * The i2c controller selects initially ID EEPROM in the u-boot;
+ * but if menu configuration selects RTC support in the kernel,
+ * the i2c controller switches to select RTC chip in the kernel.
+ */
+#ifdef CONFIG_RTC_CLASS
+ /* Enable RTC chip on the segment #2 of i2c */
+ clrbits8(&bcsr_regs[15], BCSR15_I2C_BUS0_SEG_CLR);
+ setbits8(&bcsr_regs[15], BCSR15_I2C_BUS0_SEG2);
+#endif
+
+ iounmap(bcsr_regs);
+ }
+ }
+
+ mpc85xx_smp_init();
+
+ fsl_pci_assign_primary();
+}
+
+machine_arch_initcall(p1023_rdb, mpc85xx_common_publish_devices);
+
+static void __init p1023_rdb_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+
+ BUG_ON(mpic == NULL);
+
+ mpic_init(mpic);
+}
+
+define_machine(p1023_rdb) {
+ .name = "P1023 RDB",
+ .compatible = "fsl,P1023RDB",
+ .setup_arch = p1023_rdb_setup_arch,
+ .init_IRQ = p1023_rdb_pic_init,
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+};
diff --git a/arch/powerpc/platforms/85xx/p2020.c b/arch/powerpc/platforms/85xx/p2020.c
new file mode 100644
index 000000000..0e4d71514
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p2020.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale P2020 board Setup
+ *
+ * Copyright 2007,2009,2012-2013 Freescale Semiconductor Inc.
+ * Copyright 2022-2023 Pali Rohár <pali@kernel.org>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include <asm/ppc-pci.h>
+
+#include <sysdev/fsl_pci.h>
+
+#include "smp.h"
+#include "mpc85xx.h"
+
+static void __init p2020_pic_init(void)
+{
+ struct mpic *mpic;
+ int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU;
+
+ mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC ");
+
+ if (WARN_ON(!mpic))
+ return;
+
+ mpic_init(mpic);
+ mpc85xx_8259_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init p2020_setup_arch(void)
+{
+ swiotlb_detect_4g();
+ fsl_pci_assign_primary();
+ uli_init();
+ mpc85xx_smp_init();
+ mpc85xx_qe_par_io_init();
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p2020_probe(void)
+{
+ struct device_node *p2020_cpu;
+
+ /*
+ * There is no common compatible string for all P2020 boards.
+ * The only common thing is "PowerPC,P2020@0" cpu node.
+ * So check for P2020 board via this cpu node.
+ */
+ p2020_cpu = of_find_node_by_path("/cpus/PowerPC,P2020@0");
+ of_node_put(p2020_cpu);
+
+ return !!p2020_cpu;
+}
+
+machine_arch_initcall(p2020, mpc85xx_common_publish_devices);
+
+define_machine(p2020) {
+ .name = "Freescale P2020",
+ .probe = p2020_probe,
+ .setup_arch = p2020_setup_arch,
+ .init_IRQ = p2020_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c
new file mode 100644
index 000000000..acd19c52a
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/ppa8548.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ppa8548 setup and early boot code.
+ *
+ * Copyright 2009 Prodrive B.V..
+ *
+ * By Stef van Os (see MAINTAINERS for contact information)
+ *
+ * Based on the SBC8548 support - Copyright 2007 Wind River Systems Inc.
+ * Based on the MPC8548CDS support - Copyright 2005 Freescale Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/reboot.h>
+#include <linux/seq_file.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+
+static void __init ppa8548_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init ppa8548_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("ppa8548_setup_arch()", 0);
+}
+
+static void ppa8548_show_cpuinfo(struct seq_file *m)
+{
+ uint32_t svid, phid1;
+
+ svid = mfspr(SPRN_SVR);
+
+ seq_printf(m, "Vendor\t\t: Prodrive B.V.\n");
+ seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+ /* Display cpu Pll setting */
+ phid1 = mfspr(SPRN_HID1);
+ seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .name = "soc", },
+ { .type = "soc", },
+ { .compatible = "simple-bus", },
+ { .compatible = "gianfar", },
+ { .compatible = "fsl,srio", },
+ {},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+ of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+ return 0;
+}
+machine_device_initcall(ppa8548, declare_of_platform_devices);
+
+define_machine(ppa8548) {
+ .name = "ppa8548",
+ .compatible = "ppa8548",
+ .setup_arch = ppa8548_setup_arch,
+ .init_IRQ = ppa8548_pic_init,
+ .show_cpuinfo = ppa8548_show_cpuinfo,
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
new file mode 100644
index 000000000..3cd2f3bd4
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Paravirt target for a generic QEMU e500 machine
+ *
+ * This is intended to be a flexible device-tree-driven platform, not fixed
+ * to a particular piece of hardware or a particular spec of virtual hardware,
+ * beyond the assumption of an e500-family CPU. Some things are still hardcoded
+ * here, such as MPIC, but this is a limitation of the current code rather than
+ * an interface contract with QEMU.
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/pgtable.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+#include "mpc85xx.h"
+
+static void __init qemu_e500_pic_init(void)
+{
+ struct mpic *mpic;
+ unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
+ MPIC_ENABLE_COREINT;
+
+ mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC ");
+
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+}
+
+static void __init qemu_e500_setup_arch(void)
+{
+ ppc_md.progress("qemu_e500_setup_arch()", 0);
+
+ fsl_pci_assign_primary();
+ swiotlb_detect_4g();
+ mpc85xx_smp_init();
+}
+
+machine_arch_initcall(qemu_e500, mpc85xx_common_publish_devices);
+
+define_machine(qemu_e500) {
+ .name = "QEMU e500",
+ .compatible = "fsl,qemu-e500",
+ .setup_arch = qemu_e500_setup_arch,
+ .init_IRQ = qemu_e500_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_coreint_irq,
+ .progress = udbg_progress,
+ .power_save = e500_idle,
+};
diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c
new file mode 100644
index 000000000..751395cbf
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Servergy CTS-1000 Setup
+ *
+ * Maintained by Ben Collins <ben.c@servergy.com>
+ *
+ * Copyright 2012 by Servergy, Inc.
+ */
+
+#define pr_fmt(fmt) "gpio-halt: " fmt
+
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/workqueue.h>
+#include <linux/reboot.h>
+#include <linux/interrupt.h>
+
+#include <asm/machdep.h>
+
+static struct gpio_desc *halt_gpio;
+static int halt_irq;
+
+static const struct of_device_id child_match[] = {
+ {
+ .compatible = "sgy,gpio-halt",
+ },
+ {},
+};
+
+static void gpio_halt_wfn(struct work_struct *work)
+{
+ /* Likely wont return */
+ orderly_poweroff(true);
+}
+static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn);
+
+static void __noreturn gpio_halt_cb(void)
+{
+ pr_info("triggering GPIO.\n");
+
+ /* Probably wont return */
+ gpiod_set_value(halt_gpio, 1);
+
+ panic("Halt failed\n");
+}
+
+/* This IRQ means someone pressed the power button and it is waiting for us
+ * to handle the shutdown/poweroff. */
+static irqreturn_t gpio_halt_irq(int irq, void *__data)
+{
+ struct platform_device *pdev = __data;
+
+ dev_info(&pdev->dev, "scheduling shutdown due to power button IRQ\n");
+ schedule_work(&gpio_halt_wq);
+
+ return IRQ_HANDLED;
+};
+
+static int __gpio_halt_probe(struct platform_device *pdev,
+ struct device_node *halt_node)
+{
+ int err;
+
+ halt_gpio = fwnode_gpiod_get_index(of_fwnode_handle(halt_node),
+ NULL, 0, GPIOD_OUT_LOW, "gpio-halt");
+ err = PTR_ERR_OR_ZERO(halt_gpio);
+ if (err) {
+ dev_err(&pdev->dev, "failed to request halt GPIO: %d\n", err);
+ return err;
+ }
+
+ /* Now get the IRQ which tells us when the power button is hit */
+ halt_irq = irq_of_parse_and_map(halt_node, 0);
+ err = request_irq(halt_irq, gpio_halt_irq,
+ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+ "gpio-halt", pdev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to request IRQ %d: %d\n",
+ halt_irq, err);
+ gpiod_put(halt_gpio);
+ halt_gpio = NULL;
+ return err;
+ }
+
+ /* Register our halt function */
+ ppc_md.halt = gpio_halt_cb;
+ pm_power_off = gpio_halt_cb;
+
+ dev_info(&pdev->dev, "registered halt GPIO, irq: %d\n", halt_irq);
+
+ return 0;
+}
+
+static int gpio_halt_probe(struct platform_device *pdev)
+{
+ struct device_node *halt_node;
+ int ret;
+
+ if (!pdev->dev.of_node)
+ return -ENODEV;
+
+ /* If there's no matching child, this isn't really an error */
+ halt_node = of_find_matching_node(pdev->dev.of_node, child_match);
+ if (!halt_node)
+ return -ENODEV;
+
+ ret = __gpio_halt_probe(pdev, halt_node);
+ of_node_put(halt_node);
+
+ return ret;
+}
+
+static int gpio_halt_remove(struct platform_device *pdev)
+{
+ free_irq(halt_irq, pdev);
+ cancel_work_sync(&gpio_halt_wq);
+
+ ppc_md.halt = NULL;
+ pm_power_off = NULL;
+
+ gpiod_put(halt_gpio);
+ halt_gpio = NULL;
+
+ return 0;
+}
+
+static const struct of_device_id gpio_halt_match[] = {
+ /* We match on the gpio bus itself and scan the children since they
+ * wont be matched against us. We know the bus wont match until it
+ * has been registered too. */
+ {
+ .compatible = "fsl,qoriq-gpio",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, gpio_halt_match);
+
+static struct platform_driver gpio_halt_driver = {
+ .driver = {
+ .name = "gpio-halt",
+ .of_match_table = gpio_halt_match,
+ },
+ .probe = gpio_halt_probe,
+ .remove = gpio_halt_remove,
+};
+
+module_platform_driver(gpio_halt_driver);
+
+MODULE_DESCRIPTION("Driver to support GPIO triggered system halt for Servergy CTS-1000 Systems.");
+MODULE_VERSION("1.0");
+MODULE_AUTHOR("Ben Collins <ben.c@servergy.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
new file mode 100644
index 000000000..40aa58206
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Andy Fleming <afleming@freescale.com>
+ * Kumar Gala <galak@kernel.crashing.org>
+ *
+ * Copyright 2006-2008, 2011-2012, 2015 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/sched/hotplug.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/kexec.h>
+#include <linux/highmem.h>
+#include <linux/cpu.h>
+#include <linux/fsl/guts.h>
+#include <linux/pgtable.h>
+
+#include <asm/machdep.h>
+#include <asm/page.h>
+#include <asm/mpic.h>
+#include <asm/cacheflush.h>
+#include <asm/dbell.h>
+#include <asm/code-patching.h>
+#include <asm/cputhreads.h>
+#include <asm/fsl_pm.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/mpic.h>
+#include "smp.h"
+
+struct epapr_spin_table {
+ u32 addr_h;
+ u32 addr_l;
+ u32 r3_h;
+ u32 r3_l;
+ u32 reserved;
+ u32 pir;
+};
+
+static u64 timebase;
+static int tb_req;
+static int tb_valid;
+
+static void mpc85xx_give_timebase(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ hard_irq_disable();
+
+ while (!tb_req)
+ barrier();
+ tb_req = 0;
+
+ qoriq_pm_ops->freeze_time_base(true);
+#ifdef CONFIG_PPC64
+ /*
+ * e5500/e6500 have a workaround for erratum A-006958 in place
+ * that will reread the timebase until TBL is non-zero.
+ * That would be a bad thing when the timebase is frozen.
+ *
+ * Thus, we read it manually, and instead of checking that
+ * TBL is non-zero, we ensure that TB does not change. We don't
+ * do that for the main mftb implementation, because it requires
+ * a scratch register
+ */
+ {
+ u64 prev;
+
+ asm volatile("mfspr %0, %1" : "=r" (timebase) :
+ "i" (SPRN_TBRL));
+
+ do {
+ prev = timebase;
+ asm volatile("mfspr %0, %1" : "=r" (timebase) :
+ "i" (SPRN_TBRL));
+ } while (prev != timebase);
+ }
+#else
+ timebase = get_tb();
+#endif
+ mb();
+ tb_valid = 1;
+
+ while (tb_valid)
+ barrier();
+
+ qoriq_pm_ops->freeze_time_base(false);
+
+ local_irq_restore(flags);
+}
+
+static void mpc85xx_take_timebase(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ hard_irq_disable();
+
+ tb_req = 1;
+ while (!tb_valid)
+ barrier();
+
+ set_tb(timebase >> 32, timebase & 0xffffffff);
+ isync();
+ tb_valid = 0;
+
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void smp_85xx_cpu_offline_self(void)
+{
+ unsigned int cpu = smp_processor_id();
+
+ local_irq_disable();
+ hard_irq_disable();
+ /* mask all irqs to prevent cpu wakeup */
+ qoriq_pm_ops->irq_mask(cpu);
+
+ idle_task_exit();
+
+ mtspr(SPRN_TCR, 0);
+ mtspr(SPRN_TSR, mfspr(SPRN_TSR));
+
+ generic_set_cpu_dead(cpu);
+
+ cur_cpu_spec->cpu_down_flush();
+
+ qoriq_pm_ops->cpu_die(cpu);
+
+ while (1)
+ ;
+}
+
+static void qoriq_cpu_kill(unsigned int cpu)
+{
+ int i;
+
+ for (i = 0; i < 500; i++) {
+ if (is_cpu_dead(cpu)) {
+#ifdef CONFIG_PPC64
+ paca_ptrs[cpu]->cpu_start = 0;
+#endif
+ return;
+ }
+ msleep(20);
+ }
+ pr_err("CPU%d didn't die...\n", cpu);
+}
+#endif
+
+/*
+ * To keep it compatible with old boot program which uses
+ * cache-inhibit spin table, we need to flush the cache
+ * before accessing spin table to invalidate any staled data.
+ * We also need to flush the cache after writing to spin
+ * table to push data out.
+ */
+static inline void flush_spin_table(void *spin_table)
+{
+ flush_dcache_range((ulong)spin_table,
+ (ulong)spin_table + sizeof(struct epapr_spin_table));
+}
+
+static inline u32 read_spin_table_addr_l(void *spin_table)
+{
+ flush_dcache_range((ulong)spin_table,
+ (ulong)spin_table + sizeof(struct epapr_spin_table));
+ return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l);
+}
+
+#ifdef CONFIG_PPC64
+static void wake_hw_thread(void *info)
+{
+ void fsl_secondary_thread_init(void);
+ unsigned long inia;
+ int cpu = *(const int *)info;
+
+ inia = ppc_function_entry(fsl_secondary_thread_init);
+ book3e_start_thread(cpu_thread_in_core(cpu), inia);
+}
+#endif
+
+static int smp_85xx_start_cpu(int cpu)
+{
+ int ret = 0;
+ struct device_node *np;
+ const u64 *cpu_rel_addr;
+ unsigned long flags;
+ int ioremappable;
+ int hw_cpu = get_hard_smp_processor_id(cpu);
+ struct epapr_spin_table __iomem *spin_table;
+
+ np = of_get_cpu_node(cpu, NULL);
+ cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL);
+ if (!cpu_rel_addr) {
+ pr_err("No cpu-release-addr for cpu %d\n", cpu);
+ return -ENOENT;
+ }
+
+ /*
+ * A secondary core could be in a spinloop in the bootpage
+ * (0xfffff000), somewhere in highmem, or somewhere in lowmem.
+ * The bootpage and highmem can be accessed via ioremap(), but
+ * we need to directly access the spinloop if its in lowmem.
+ */
+ ioremappable = *cpu_rel_addr > virt_to_phys(high_memory - 1);
+
+ /* Map the spin table */
+ if (ioremappable)
+ spin_table = ioremap_coherent(*cpu_rel_addr,
+ sizeof(struct epapr_spin_table));
+ else
+ spin_table = phys_to_virt(*cpu_rel_addr);
+
+ local_irq_save(flags);
+ hard_irq_disable();
+
+ if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
+ qoriq_pm_ops->cpu_up_prepare(cpu);
+
+ /* if cpu is not spinning, reset it */
+ if (read_spin_table_addr_l(spin_table) != 1) {
+ /*
+ * We don't set the BPTR register here since it already points
+ * to the boot page properly.
+ */
+ mpic_reset_core(cpu);
+
+ /*
+ * wait until core is ready...
+ * We need to invalidate the stale data, in case the boot
+ * loader uses a cache-inhibited spin table.
+ */
+ if (!spin_event_timeout(
+ read_spin_table_addr_l(spin_table) == 1,
+ 10000, 100)) {
+ pr_err("timeout waiting for cpu %d to reset\n",
+ hw_cpu);
+ ret = -EAGAIN;
+ goto err;
+ }
+ }
+
+ flush_spin_table(spin_table);
+ out_be32(&spin_table->pir, hw_cpu);
+#ifdef CONFIG_PPC64
+ out_be64((u64 *)(&spin_table->addr_h),
+ __pa(ppc_function_entry(generic_secondary_smp_init)));
+#else
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ /*
+ * We need also to write addr_h to spin table for systems
+ * in which their physical memory start address was configured
+ * to above 4G, otherwise the secondary core can not get
+ * correct entry to start from.
+ */
+ out_be32(&spin_table->addr_h, __pa(__early_start) >> 32);
+#endif
+ out_be32(&spin_table->addr_l, __pa(__early_start));
+#endif
+ flush_spin_table(spin_table);
+err:
+ local_irq_restore(flags);
+
+ if (ioremappable)
+ iounmap(spin_table);
+
+ return ret;
+}
+
+static int smp_85xx_kick_cpu(int nr)
+{
+ int ret = 0;
+#ifdef CONFIG_PPC64
+ int primary = nr;
+#endif
+
+ WARN_ON(nr < 0 || nr >= num_possible_cpus());
+
+ pr_debug("kick CPU #%d\n", nr);
+
+#ifdef CONFIG_PPC64
+ if (threads_per_core == 2) {
+ if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT)))
+ return -ENOENT;
+
+ booting_thread_hwid = cpu_thread_in_core(nr);
+ primary = cpu_first_thread_sibling(nr);
+
+ if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
+ qoriq_pm_ops->cpu_up_prepare(nr);
+
+ /*
+ * If either thread in the core is online, use it to start
+ * the other.
+ */
+ if (cpu_online(primary)) {
+ smp_call_function_single(primary,
+ wake_hw_thread, &nr, 1);
+ goto done;
+ } else if (cpu_online(primary + 1)) {
+ smp_call_function_single(primary + 1,
+ wake_hw_thread, &nr, 1);
+ goto done;
+ }
+
+ /*
+ * If getting here, it means both threads in the core are
+ * offline. So start the primary thread, then it will start
+ * the thread specified in booting_thread_hwid, the one
+ * corresponding to nr.
+ */
+
+ } else if (threads_per_core == 1) {
+ /*
+ * If one core has only one thread, set booting_thread_hwid to
+ * an invalid value.
+ */
+ booting_thread_hwid = INVALID_THREAD_HWID;
+
+ } else if (threads_per_core > 2) {
+ pr_err("Do not support more than 2 threads per CPU.");
+ return -EINVAL;
+ }
+
+ ret = smp_85xx_start_cpu(primary);
+ if (ret)
+ return ret;
+
+done:
+ paca_ptrs[nr]->cpu_start = 1;
+ generic_set_cpu_up(nr);
+
+ return ret;
+#else
+ ret = smp_85xx_start_cpu(nr);
+ if (ret)
+ return ret;
+
+ generic_set_cpu_up(nr);
+
+ return ret;
+#endif
+}
+
+struct smp_ops_t smp_85xx_ops = {
+ .cause_nmi_ipi = NULL,
+ .kick_cpu = smp_85xx_kick_cpu,
+ .cpu_bootable = smp_generic_cpu_bootable,
+#ifdef CONFIG_HOTPLUG_CPU
+ .cpu_disable = generic_cpu_disable,
+ .cpu_die = generic_cpu_die,
+#endif
+#if defined(CONFIG_KEXEC_CORE) && !defined(CONFIG_PPC64)
+ .give_timebase = smp_generic_give_timebase,
+ .take_timebase = smp_generic_take_timebase,
+#endif
+};
+
+#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_PPC32
+atomic_t kexec_down_cpus = ATOMIC_INIT(0);
+
+static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+ local_irq_disable();
+
+ if (secondary) {
+ cur_cpu_spec->cpu_down_flush();
+ atomic_inc(&kexec_down_cpus);
+ /* loop forever */
+ while (1);
+ }
+}
+
+static void mpc85xx_smp_kexec_down(void *arg)
+{
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0,1);
+}
+#else
+static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+ int cpu = smp_processor_id();
+ int sibling = cpu_last_thread_sibling(cpu);
+ bool notified = false;
+ int disable_cpu;
+ int disable_threadbit = 0;
+ long start = mftb();
+ long now;
+
+ local_irq_disable();
+ hard_irq_disable();
+ mpic_teardown_this_cpu(secondary);
+
+ if (cpu == crashing_cpu && cpu_thread_in_core(cpu) != 0) {
+ /*
+ * We enter the crash kernel on whatever cpu crashed,
+ * even if it's a secondary thread. If that's the case,
+ * disable the corresponding primary thread.
+ */
+ disable_threadbit = 1;
+ disable_cpu = cpu_first_thread_sibling(cpu);
+ } else if (sibling != crashing_cpu &&
+ cpu_thread_in_core(cpu) == 0 &&
+ cpu_thread_in_core(sibling) != 0) {
+ disable_threadbit = 2;
+ disable_cpu = sibling;
+ }
+
+ if (disable_threadbit) {
+ while (paca_ptrs[disable_cpu]->kexec_state < KEXEC_STATE_REAL_MODE) {
+ barrier();
+ now = mftb();
+ if (!notified && now - start > 1000000) {
+ pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n",
+ __func__, smp_processor_id(),
+ disable_cpu,
+ paca_ptrs[disable_cpu]->kexec_state);
+ notified = true;
+ }
+ }
+
+ if (notified) {
+ pr_info("%s: cpu %d done waiting\n",
+ __func__, disable_cpu);
+ }
+
+ mtspr(SPRN_TENC, disable_threadbit);
+ while (mfspr(SPRN_TENSR) & disable_threadbit)
+ cpu_relax();
+ }
+}
+#endif
+
+static void mpc85xx_smp_machine_kexec(struct kimage *image)
+{
+#ifdef CONFIG_PPC32
+ int timeout = INT_MAX;
+ int i, num_cpus = num_present_cpus();
+
+ if (image->type == KEXEC_TYPE_DEFAULT)
+ smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
+
+ while ( (atomic_read(&kexec_down_cpus) != (num_cpus - 1)) &&
+ ( timeout > 0 ) )
+ {
+ timeout--;
+ }
+
+ if ( !timeout )
+ printk(KERN_ERR "Unable to bring down secondary cpu(s)");
+
+ for_each_online_cpu(i)
+ {
+ if ( i == smp_processor_id() ) continue;
+ mpic_reset_core(i);
+ }
+#endif
+
+ default_machine_kexec(image);
+}
+#endif /* CONFIG_KEXEC_CORE */
+
+static void smp_85xx_setup_cpu(int cpu_nr)
+{
+ mpic_setup_this_cpu();
+}
+
+void __init mpc85xx_smp_init(void)
+{
+ struct device_node *np;
+
+
+ np = of_find_node_by_type(NULL, "open-pic");
+ if (np) {
+ smp_85xx_ops.probe = smp_mpic_probe;
+ smp_85xx_ops.setup_cpu = smp_85xx_setup_cpu;
+ smp_85xx_ops.message_pass = smp_mpic_message_pass;
+ } else
+ smp_85xx_ops.setup_cpu = NULL;
+
+ if (cpu_has_feature(CPU_FTR_DBELL)) {
+ /*
+ * If left NULL, .message_pass defaults to
+ * smp_muxed_ipi_message_pass
+ */
+ smp_85xx_ops.message_pass = NULL;
+ smp_85xx_ops.cause_ipi = doorbell_global_ipi;
+ smp_85xx_ops.probe = NULL;
+ }
+
+#ifdef CONFIG_FSL_CORENET_RCPM
+ /* Assign a value to qoriq_pm_ops on PPC_E500MC */
+ fsl_rcpm_init();
+#else
+ /* Assign a value to qoriq_pm_ops on !PPC_E500MC */
+ mpc85xx_setup_pmc();
+#endif
+ if (qoriq_pm_ops) {
+ smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
+ smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
+#ifdef CONFIG_HOTPLUG_CPU
+ smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
+ smp_85xx_ops.cpu_die = qoriq_cpu_kill;
+#endif
+ }
+ smp_ops = &smp_85xx_ops;
+
+#ifdef CONFIG_KEXEC_CORE
+ ppc_md.kexec_cpu_down = mpc85xx_smp_kexec_cpu_down;
+ ppc_md.machine_kexec = mpc85xx_smp_machine_kexec;
+#endif
+}
diff --git a/arch/powerpc/platforms/85xx/smp.h b/arch/powerpc/platforms/85xx/smp.h
new file mode 100644
index 000000000..3936ff6df
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/smp.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef POWERPC_85XX_SMP_H_
+#define POWERPC_85XX_SMP_H_ 1
+
+#include <linux/init.h>
+
+#ifdef CONFIG_SMP
+void __init mpc85xx_smp_init(void);
+int __init mpc85xx_setup_pmc(void);
+#else
+static inline void mpc85xx_smp_init(void)
+{
+ /* Nothing to do */
+}
+#endif
+
+#endif /* not POWERPC_85XX_SMP_H_ */
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
new file mode 100644
index 000000000..403367b31
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2008 Emcraft Systems
+ * Sergei Poselenov <sposelenov@emcraft.com>
+ *
+ * Based on MPC8560 ADS and arch/ppc tqm85xx ports
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *
+ * Copyright (c) 2005-2006 DENX Software Engineering
+ * Stefan Roese <sr@denx.de>
+ *
+ * Based on original work by
+ * Kumar Gala <kumar.gala@freescale.com>
+ * Copyright 2004 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+#include "socrates_fpga_pic.h"
+
+static void __init socrates_pic_init(void)
+{
+ struct device_node *np;
+
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+
+ np = of_find_compatible_node(NULL, NULL, "abb,socrates-fpga-pic");
+ if (!np) {
+ printk(KERN_ERR "Could not find socrates-fpga-pic node\n");
+ return;
+ }
+ socrates_fpga_pic_init(np);
+ of_node_put(np);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init socrates_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("socrates_setup_arch()", 0);
+
+ fsl_pci_assign_primary();
+}
+
+machine_arch_initcall(socrates, mpc85xx_common_publish_devices);
+
+define_machine(socrates) {
+ .name = "Socrates",
+ .compatible = "abb,socrates",
+ .setup_arch = socrates_setup_arch,
+ .init_IRQ = socrates_pic_init,
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
new file mode 100644
index 000000000..baa12eff6
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2008 Ilya Yanok, Emcraft Systems
+ */
+
+#include <linux/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/io.h>
+
+/*
+ * The FPGA supports 9 interrupt sources, which can be routed to 3
+ * interrupt request lines of the MPIC. The line to be used can be
+ * specified through the third cell of FDT property "interrupts".
+ */
+
+#define SOCRATES_FPGA_NUM_IRQS 9
+
+#define FPGA_PIC_IRQCFG (0x0)
+#define FPGA_PIC_IRQMASK(n) (0x4 + 0x4 * (n))
+
+#define SOCRATES_FPGA_IRQ_MASK ((1 << SOCRATES_FPGA_NUM_IRQS) - 1)
+
+struct socrates_fpga_irq_info {
+ unsigned int irq_line;
+ int type;
+};
+
+/*
+ * Interrupt routing and type table
+ *
+ * IRQ_TYPE_NONE means the interrupt type is configurable,
+ * otherwise it's fixed to the specified value.
+ */
+static struct socrates_fpga_irq_info fpga_irqs[SOCRATES_FPGA_NUM_IRQS] = {
+ [0] = {0, IRQ_TYPE_NONE},
+ [1] = {0, IRQ_TYPE_LEVEL_HIGH},
+ [2] = {0, IRQ_TYPE_LEVEL_LOW},
+ [3] = {0, IRQ_TYPE_NONE},
+ [4] = {0, IRQ_TYPE_NONE},
+ [5] = {0, IRQ_TYPE_NONE},
+ [6] = {0, IRQ_TYPE_NONE},
+ [7] = {0, IRQ_TYPE_NONE},
+ [8] = {0, IRQ_TYPE_LEVEL_HIGH},
+};
+
+static DEFINE_RAW_SPINLOCK(socrates_fpga_pic_lock);
+
+static void __iomem *socrates_fpga_pic_iobase;
+static struct irq_domain *socrates_fpga_pic_irq_host;
+static unsigned int socrates_fpga_irqs[3];
+
+static inline uint32_t socrates_fpga_pic_read(int reg)
+{
+ return in_be32(socrates_fpga_pic_iobase + reg);
+}
+
+static inline void socrates_fpga_pic_write(int reg, uint32_t val)
+{
+ out_be32(socrates_fpga_pic_iobase + reg, val);
+}
+
+static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq)
+{
+ uint32_t cause;
+ unsigned long flags;
+ int i;
+
+ /* Check irq line routed to the MPIC */
+ for (i = 0; i < 3; i++) {
+ if (irq == socrates_fpga_irqs[i])
+ break;
+ }
+ if (i == 3)
+ return 0;
+
+ raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+ cause = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(i));
+ raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+ for (i = SOCRATES_FPGA_NUM_IRQS - 1; i >= 0; i--) {
+ if (cause >> (i + 16))
+ break;
+ }
+ return irq_linear_revmap(socrates_fpga_pic_irq_host,
+ (irq_hw_number_t)i);
+}
+
+static void socrates_fpga_pic_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int irq = irq_desc_get_irq(desc);
+ unsigned int cascade_irq;
+
+ /*
+ * See if we actually have an interrupt, call generic handling code if
+ * we do.
+ */
+ cascade_irq = socrates_fpga_pic_get_irq(irq);
+
+ if (cascade_irq)
+ generic_handle_irq(cascade_irq);
+ chip->irq_eoi(&desc->irq_data);
+}
+
+static void socrates_fpga_pic_ack(struct irq_data *d)
+{
+ unsigned long flags;
+ unsigned int irq_line, hwirq = irqd_to_hwirq(d);
+ uint32_t mask;
+
+ irq_line = fpga_irqs[hwirq].irq_line;
+ raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+ mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+ & SOCRATES_FPGA_IRQ_MASK;
+ mask |= (1 << (hwirq + 16));
+ socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+ raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_mask(struct irq_data *d)
+{
+ unsigned long flags;
+ unsigned int hwirq = irqd_to_hwirq(d);
+ int irq_line;
+ u32 mask;
+
+ irq_line = fpga_irqs[hwirq].irq_line;
+ raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+ mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+ & SOCRATES_FPGA_IRQ_MASK;
+ mask &= ~(1 << hwirq);
+ socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+ raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_mask_ack(struct irq_data *d)
+{
+ unsigned long flags;
+ unsigned int hwirq = irqd_to_hwirq(d);
+ int irq_line;
+ u32 mask;
+
+ irq_line = fpga_irqs[hwirq].irq_line;
+ raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+ mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+ & SOCRATES_FPGA_IRQ_MASK;
+ mask &= ~(1 << hwirq);
+ mask |= (1 << (hwirq + 16));
+ socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+ raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_unmask(struct irq_data *d)
+{
+ unsigned long flags;
+ unsigned int hwirq = irqd_to_hwirq(d);
+ int irq_line;
+ u32 mask;
+
+ irq_line = fpga_irqs[hwirq].irq_line;
+ raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+ mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+ & SOCRATES_FPGA_IRQ_MASK;
+ mask |= (1 << hwirq);
+ socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+ raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static void socrates_fpga_pic_eoi(struct irq_data *d)
+{
+ unsigned long flags;
+ unsigned int hwirq = irqd_to_hwirq(d);
+ int irq_line;
+ u32 mask;
+
+ irq_line = fpga_irqs[hwirq].irq_line;
+ raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+ mask = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(irq_line))
+ & SOCRATES_FPGA_IRQ_MASK;
+ mask |= (1 << (hwirq + 16));
+ socrates_fpga_pic_write(FPGA_PIC_IRQMASK(irq_line), mask);
+ raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+}
+
+static int socrates_fpga_pic_set_type(struct irq_data *d,
+ unsigned int flow_type)
+{
+ unsigned long flags;
+ unsigned int hwirq = irqd_to_hwirq(d);
+ int polarity;
+ u32 mask;
+
+ if (fpga_irqs[hwirq].type != IRQ_TYPE_NONE)
+ return -EINVAL;
+
+ switch (flow_type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_LEVEL_HIGH:
+ polarity = 1;
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+ polarity = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+ raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+ mask = socrates_fpga_pic_read(FPGA_PIC_IRQCFG);
+ if (polarity)
+ mask |= (1 << hwirq);
+ else
+ mask &= ~(1 << hwirq);
+ socrates_fpga_pic_write(FPGA_PIC_IRQCFG, mask);
+ raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+ return 0;
+}
+
+static struct irq_chip socrates_fpga_pic_chip = {
+ .name = "FPGA-PIC",
+ .irq_ack = socrates_fpga_pic_ack,
+ .irq_mask = socrates_fpga_pic_mask,
+ .irq_mask_ack = socrates_fpga_pic_mask_ack,
+ .irq_unmask = socrates_fpga_pic_unmask,
+ .irq_eoi = socrates_fpga_pic_eoi,
+ .irq_set_type = socrates_fpga_pic_set_type,
+};
+
+static int socrates_fpga_pic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ /* All interrupts are LEVEL sensitive */
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_chip_and_handler(virq, &socrates_fpga_pic_chip,
+ handle_fasteoi_irq);
+
+ return 0;
+}
+
+static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
+ struct device_node *ct, const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+ struct socrates_fpga_irq_info *fpga_irq = &fpga_irqs[intspec[0]];
+
+ *out_hwirq = intspec[0];
+ if (fpga_irq->type == IRQ_TYPE_NONE) {
+ /* type is configurable */
+ if (intspec[1] != IRQ_TYPE_LEVEL_LOW &&
+ intspec[1] != IRQ_TYPE_LEVEL_HIGH) {
+ pr_warn("FPGA PIC: invalid irq type, setting default active low\n");
+ *out_flags = IRQ_TYPE_LEVEL_LOW;
+ } else {
+ *out_flags = intspec[1];
+ }
+ } else {
+ /* type is fixed */
+ *out_flags = fpga_irq->type;
+ }
+
+ /* Use specified interrupt routing */
+ if (intspec[2] <= 2)
+ fpga_irq->irq_line = intspec[2];
+ else
+ pr_warn("FPGA PIC: invalid irq routing\n");
+
+ return 0;
+}
+
+static const struct irq_domain_ops socrates_fpga_pic_host_ops = {
+ .map = socrates_fpga_pic_host_map,
+ .xlate = socrates_fpga_pic_host_xlate,
+};
+
+void __init socrates_fpga_pic_init(struct device_node *pic)
+{
+ unsigned long flags;
+ int i;
+
+ /* Setup an irq_domain structure */
+ socrates_fpga_pic_irq_host = irq_domain_add_linear(pic,
+ SOCRATES_FPGA_NUM_IRQS, &socrates_fpga_pic_host_ops, NULL);
+ if (socrates_fpga_pic_irq_host == NULL) {
+ pr_err("FPGA PIC: Unable to allocate host\n");
+ return;
+ }
+
+ for (i = 0; i < 3; i++) {
+ socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i);
+ if (!socrates_fpga_irqs[i]) {
+ pr_warn("FPGA PIC: can't get irq%d\n", i);
+ continue;
+ }
+ irq_set_chained_handler(socrates_fpga_irqs[i],
+ socrates_fpga_pic_cascade);
+ }
+
+ socrates_fpga_pic_iobase = of_iomap(pic, 0);
+
+ raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
+ socrates_fpga_pic_write(FPGA_PIC_IRQMASK(0),
+ SOCRATES_FPGA_IRQ_MASK << 16);
+ socrates_fpga_pic_write(FPGA_PIC_IRQMASK(1),
+ SOCRATES_FPGA_IRQ_MASK << 16);
+ socrates_fpga_pic_write(FPGA_PIC_IRQMASK(2),
+ SOCRATES_FPGA_IRQ_MASK << 16);
+ raw_spin_unlock_irqrestore(&socrates_fpga_pic_lock, flags);
+
+ pr_info("FPGA PIC: Setting up Socrates FPGA PIC\n");
+}
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
new file mode 100644
index 000000000..c50b23794
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2008 Ilya Yanok, Emcraft Systems
+ */
+
+#ifndef SOCRATES_FPGA_PIC_H
+#define SOCRATES_FPGA_PIC_H
+
+void __init socrates_fpga_pic_init(struct device_node *pic);
+
+#endif
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
new file mode 100644
index 000000000..c10efc458
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on MPC8560 ADS and arch/ppc stx_gp3 ports
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *
+ * Dan Malek <dan@embeddededge.com>
+ * Copyright 2004 Embedded Edge, LLC
+ *
+ * Copied from mpc8560_ads.c
+ * Copyright 2002, 2003 Motorola Inc.
+ *
+ * Ported to 2.6, Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2004-2005 MontaVista Software, Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+#ifdef CONFIG_CPM2
+#include <asm/cpm2.h>
+#endif /* CONFIG_CPM2 */
+
+static void __init stx_gp3_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+
+ mpc85xx_cpm2_pic_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init stx_gp3_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("stx_gp3_setup_arch()", 0);
+
+ fsl_pci_assign_primary();
+
+#ifdef CONFIG_CPM2
+ cpm2_reset();
+#endif
+}
+
+static void stx_gp3_show_cpuinfo(struct seq_file *m)
+{
+ uint pvid, svid, phid1;
+
+ pvid = mfspr(SPRN_PVR);
+ svid = mfspr(SPRN_SVR);
+
+ seq_printf(m, "Vendor\t\t: RPC Electronics STx\n");
+ seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+ seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+ /* Display cpu Pll setting */
+ phid1 = mfspr(SPRN_HID1);
+ seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+machine_arch_initcall(stx_gp3, mpc85xx_common_publish_devices);
+
+define_machine(stx_gp3) {
+ .name = "STX GP3",
+ .compatible = "stx,gp3-8560",
+ .setup_arch = stx_gp3_setup_arch,
+ .init_IRQ = stx_gp3_pic_init,
+ .show_cpuinfo = stx_gp3_show_cpuinfo,
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
new file mode 100644
index 000000000..767eed98a
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * T1042 platform DIU operation
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <sysdev/fsl_soc.h>
+
+/*DIU Pixel ClockCR offset in scfg*/
+#define CCSR_SCFG_PIXCLKCR 0x28
+
+/* DIU Pixel Clock bits of the PIXCLKCR */
+#define PIXCLKCR_PXCKEN 0x80000000
+#define PIXCLKCR_PXCKINV 0x40000000
+#define PIXCLKCR_PXCKDLY 0x0000FF00
+#define PIXCLKCR_PXCLK_MASK 0x00FF0000
+
+/* Some CPLD register definitions */
+#define CPLD_DIUCSR 0x16
+#define CPLD_DIUCSR_DVIEN 0x80
+#define CPLD_DIUCSR_BACKLIGHT 0x0f
+
+struct device_node *cpld_node;
+
+/**
+ * t1042rdb_set_monitor_port: switch the output to a different monitor port
+ */
+static void t1042rdb_set_monitor_port(enum fsl_diu_monitor_port port)
+{
+ void __iomem *cpld_base;
+
+ cpld_base = of_iomap(cpld_node, 0);
+ if (!cpld_base) {
+ pr_err("%s: Could not map cpld registers\n", __func__);
+ goto exit;
+ }
+
+ switch (port) {
+ case FSL_DIU_PORT_DVI:
+ /* Enable the DVI(HDMI) port, disable the DFP and
+ * the backlight
+ */
+ clrbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_DVIEN);
+ break;
+ case FSL_DIU_PORT_LVDS:
+ /*
+ * LVDS also needs backlight enabled, otherwise the display
+ * will be blank.
+ */
+ /* Enable the DFP port, disable the DVI*/
+ setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 8);
+ setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 4);
+ setbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_BACKLIGHT);
+ break;
+ default:
+ pr_err("%s: Unsupported monitor port %i\n", __func__, port);
+ }
+
+ iounmap(cpld_base);
+exit:
+ of_node_put(cpld_node);
+}
+
+/**
+ * t1042rdb_set_pixel_clock: program the DIU's clock
+ * @pixclock: pixel clock in ps (pico seconds)
+ */
+static void t1042rdb_set_pixel_clock(unsigned int pixclock)
+{
+ struct device_node *scfg_np;
+ void __iomem *scfg;
+ unsigned long freq;
+ u64 temp;
+ u32 pxclk;
+
+ scfg_np = of_find_compatible_node(NULL, NULL, "fsl,t1040-scfg");
+ if (!scfg_np) {
+ pr_err("%s: Missing scfg node. Can not display video.\n",
+ __func__);
+ return;
+ }
+
+ scfg = of_iomap(scfg_np, 0);
+ of_node_put(scfg_np);
+ if (!scfg) {
+ pr_err("%s: Could not map device. Can not display video.\n",
+ __func__);
+ return;
+ }
+
+ /* Convert pixclock into frequency */
+ temp = 1000000000000ULL;
+ do_div(temp, pixclock);
+ freq = temp;
+
+ /*
+ * 'pxclk' is the ratio of the platform clock to the pixel clock.
+ * This number is programmed into the PIXCLKCR register, and the valid
+ * range of values is 2-255.
+ */
+ pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq);
+ pxclk = clamp_t(u32, pxclk, 2, 255);
+
+ /* Disable the pixel clock, and set it to non-inverted and no delay */
+ clrbits32(scfg + CCSR_SCFG_PIXCLKCR,
+ PIXCLKCR_PXCKEN | PIXCLKCR_PXCKDLY | PIXCLKCR_PXCLK_MASK);
+
+ /* Enable the clock and set the pxclk */
+ setbits32(scfg + CCSR_SCFG_PIXCLKCR, PIXCLKCR_PXCKEN | (pxclk << 16));
+
+ iounmap(scfg);
+}
+
+/**
+ * t1042rdb_valid_monitor_port: set the monitor port for sysfs
+ */
+static enum fsl_diu_monitor_port
+t1042rdb_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+ switch (port) {
+ case FSL_DIU_PORT_DVI:
+ case FSL_DIU_PORT_LVDS:
+ return port;
+ default:
+ return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */
+ }
+}
+
+static int __init t1042rdb_diu_init(void)
+{
+ cpld_node = of_find_compatible_node(NULL, NULL, "fsl,t1042rdb-cpld");
+ if (!cpld_node)
+ return 0;
+
+ diu_ops.set_monitor_port = t1042rdb_set_monitor_port;
+ diu_ops.set_pixel_clock = t1042rdb_set_pixel_clock;
+ diu_ops.valid_monitor_port = t1042rdb_valid_monitor_port;
+
+ return 0;
+}
+
+early_initcall(t1042rdb_diu_init);
+
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
new file mode 100644
index 000000000..6be1b9809
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Based on MPC8560 ADS and arch/ppc tqm85xx ports
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2008 Freescale Semiconductor Inc.
+ *
+ * Copyright (c) 2005-2006 DENX Software Engineering
+ * Stefan Roese <sr@denx.de>
+ *
+ * Based on original work by
+ * Kumar Gala <kumar.gala@freescale.com>
+ * Copyright 2004 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+#ifdef CONFIG_CPM2
+#include <asm/cpm2.h>
+#endif /* CONFIG_CPM2 */
+
+static void __init tqm85xx_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0,
+ MPIC_BIG_ENDIAN,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+
+ mpc85xx_cpm2_pic_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init tqm85xx_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("tqm85xx_setup_arch()", 0);
+
+#ifdef CONFIG_CPM2
+ cpm2_reset();
+#endif
+
+ fsl_pci_assign_primary();
+}
+
+static void tqm85xx_show_cpuinfo(struct seq_file *m)
+{
+ uint pvid, svid, phid1;
+
+ pvid = mfspr(SPRN_PVR);
+ svid = mfspr(SPRN_SVR);
+
+ seq_printf(m, "Vendor\t\t: TQ Components\n");
+ seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
+ seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+ /* Display cpu Pll setting */
+ phid1 = mfspr(SPRN_HID1);
+ seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
+}
+
+static void tqm85xx_ti1520_fixup(struct pci_dev *pdev)
+{
+ unsigned int val;
+
+ /* Do not do the fixup on other platforms! */
+ if (!machine_is(tqm85xx))
+ return;
+
+ dev_info(&pdev->dev, "Using TI 1520 fixup on TQM85xx\n");
+
+ /*
+ * Enable P2CCLK bit in system control register
+ * to enable CLOCK output to power chip
+ */
+ pci_read_config_dword(pdev, 0x80, &val);
+ pci_write_config_dword(pdev, 0x80, val | (1 << 27));
+
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_1520,
+ tqm85xx_ti1520_fixup);
+
+machine_arch_initcall(tqm85xx, mpc85xx_common_publish_devices);
+
+static const char * const board[] __initconst = {
+ "tqc,tqm8540",
+ "tqc,tqm8541",
+ "tqc,tqm8548",
+ "tqc,tqm8555",
+ "tqc,tqm8560",
+ NULL
+};
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init tqm85xx_probe(void)
+{
+ return of_device_compatible_match(of_root, board);
+}
+
+define_machine(tqm85xx) {
+ .name = "TQM85xx",
+ .probe = tqm85xx_probe,
+ .setup_arch = tqm85xx_setup_arch,
+ .init_IRQ = tqm85xx_pic_init,
+ .show_cpuinfo = tqm85xx_show_cpuinfo,
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
new file mode 100644
index 000000000..c0a0456f1
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2010-2011, 2013 Freescale Semiconductor, Inc.
+ *
+ * Author: Michael Johnston <michael.johnston@freescale.com>
+ *
+ * Description:
+ * TWR-P102x Board Setup
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/fsl/guts.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <soc/fsl/qe/qe.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+static void __init twr_p1025_pic_init(void)
+{
+ struct mpic *mpic;
+
+ mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+}
+
+/* ************************************************************************
+ *
+ * Setup the architecture
+ *
+ */
+static void __init twr_p1025_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("twr_p1025_setup_arch()", 0);
+
+ mpc85xx_smp_init();
+
+ fsl_pci_assign_primary();
+
+#ifdef CONFIG_QUICC_ENGINE
+ mpc85xx_qe_par_io_init();
+
+#if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE)
+ if (machine_is(twr_p1025)) {
+ struct ccsr_guts __iomem *guts;
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,p1021-guts");
+ if (np) {
+ guts = of_iomap(np, 0);
+ if (!guts)
+ pr_err("twr_p1025: could not map global utilities register\n");
+ else {
+ /* P1025 has pins muxed for QE and other functions. To
+ * enable QE UEC mode, we need to set bit QE0 for UCC1
+ * in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
+ * and QE12 for QE MII management signals in PMUXCR
+ * register.
+ * Set QE mux bits in PMUXCR */
+ setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
+ MPC85xx_PMUXCR_QE(3) |
+ MPC85xx_PMUXCR_QE(9) |
+ MPC85xx_PMUXCR_QE(12));
+ iounmap(guts);
+
+#if IS_ENABLED(CONFIG_SERIAL_QE)
+ /* On P1025TWR board, the UCC7 acted as UART port.
+ * However, The UCC7's CTS pin is low level in default,
+ * it will impact the transmission in full duplex
+ * communication. So disable the Flow control pin PA18.
+ * The UCC7 UART just can use RXD and TXD pins.
+ */
+ par_io_config_pin(0, 18, 0, 0, 0, 0);
+#endif
+ /* Drive PB29 to CPLD low - CPLD will then change
+ * muxing from LBC to QE */
+ par_io_config_pin(1, 29, 1, 0, 0, 0);
+ par_io_data_set(1, 29, 0);
+ }
+ of_node_put(np);
+ }
+ }
+#endif
+#endif /* CONFIG_QUICC_ENGINE */
+
+ pr_info("TWR-P1025 board from Freescale Semiconductor\n");
+}
+
+machine_arch_initcall(twr_p1025, mpc85xx_common_publish_devices);
+
+define_machine(twr_p1025) {
+ .name = "TWR-P1025",
+ .compatible = "fsl,TWR-P1025",
+ .setup_arch = twr_p1025_setup_arch,
+ .init_IRQ = twr_p1025_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
new file mode 100644
index 000000000..45f257fc1
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2009 Extreme Engineering Solutions, Inc.
+ *
+ * X-ES board-specific functionality
+ *
+ * Based on mpc85xx_ds code from Freescale Semiconductor, Inc.
+ *
+ * Author: Nate Case <ncase@xes-inc.com>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+
+#include "mpc85xx.h"
+
+/* A few bit definitions needed for fixups on some boards */
+#define MPC85xx_L2CTL_L2E 0x80000000 /* L2 enable */
+#define MPC85xx_L2CTL_L2I 0x40000000 /* L2 flash invalidate */
+#define MPC85xx_L2CTL_L2SIZ_MASK 0x30000000 /* L2 SRAM size (R/O) */
+
+void __init xes_mpc85xx_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+}
+
+static void __init xes_mpc85xx_configure_l2(void __iomem *l2_base)
+{
+ volatile uint32_t ctl, tmp;
+
+ asm volatile("msync; isync");
+ tmp = in_be32(l2_base);
+
+ /*
+ * xMon may have enabled part of L2 as SRAM, so we need to set it
+ * up for all cache mode just to be safe.
+ */
+ printk(KERN_INFO "xes_mpc85xx: Enabling L2 as cache\n");
+
+ ctl = MPC85xx_L2CTL_L2E | MPC85xx_L2CTL_L2I;
+ if (of_machine_is_compatible("MPC8540") ||
+ of_machine_is_compatible("MPC8560"))
+ /*
+ * Assume L2 SRAM is used fully for cache, so set
+ * L2BLKSZ (bits 4:5) to match L2SIZ (bits 2:3).
+ */
+ ctl |= (tmp & MPC85xx_L2CTL_L2SIZ_MASK) >> 2;
+
+ asm volatile("msync; isync");
+ out_be32(l2_base, ctl);
+ asm volatile("msync; isync");
+}
+
+static void __init xes_mpc85xx_fixups(void)
+{
+ struct device_node *np;
+ int err;
+
+ /*
+ * Legacy xMon firmware on some X-ES boards does not enable L2
+ * as cache. We must ensure that they get enabled here.
+ */
+ for_each_node_by_name(np, "l2-cache-controller") {
+ struct resource r[2];
+ void __iomem *l2_base;
+
+ /* Only MPC8548, MPC8540, and MPC8560 boards are affected */
+ if (!of_device_is_compatible(np,
+ "fsl,mpc8548-l2-cache-controller") &&
+ !of_device_is_compatible(np,
+ "fsl,mpc8540-l2-cache-controller") &&
+ !of_device_is_compatible(np,
+ "fsl,mpc8560-l2-cache-controller"))
+ continue;
+
+ err = of_address_to_resource(np, 0, &r[0]);
+ if (err) {
+ printk(KERN_WARNING "xes_mpc85xx: Could not get "
+ "resource for device tree node '%pOF'",
+ np);
+ continue;
+ }
+
+ l2_base = ioremap(r[0].start, resource_size(&r[0]));
+
+ xes_mpc85xx_configure_l2(l2_base);
+ }
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init xes_mpc85xx_setup_arch(void)
+{
+ struct device_node *root;
+ const char *model = "Unknown";
+
+ root = of_find_node_by_path("/");
+ if (root == NULL)
+ return;
+
+ model = of_get_property(root, "model", NULL);
+
+ printk(KERN_INFO "X-ES MPC85xx-based single-board computer: %s\n",
+ model + strlen("xes,"));
+
+ xes_mpc85xx_fixups();
+
+ mpc85xx_smp_init();
+
+ fsl_pci_assign_primary();
+}
+
+machine_arch_initcall(xes_mpc8572, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8548, mpc85xx_common_publish_devices);
+machine_arch_initcall(xes_mpc8540, mpc85xx_common_publish_devices);
+
+define_machine(xes_mpc8572) {
+ .name = "X-ES MPC8572",
+ .compatible = "xes,MPC8572",
+ .setup_arch = xes_mpc85xx_setup_arch,
+ .init_IRQ = xes_mpc85xx_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
+
+define_machine(xes_mpc8548) {
+ .name = "X-ES MPC8548",
+ .compatible = "xes,MPC8548",
+ .setup_arch = xes_mpc85xx_setup_arch,
+ .init_IRQ = xes_mpc85xx_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
+
+define_machine(xes_mpc8540) {
+ .name = "X-ES MPC8540",
+ .compatible = "xes,MPC8540",
+ .setup_arch = xes_mpc85xx_setup_arch,
+ .init_IRQ = xes_mpc85xx_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
new file mode 100644
index 000000000..67467cd6f
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+menuconfig PPC_86xx
+ bool "86xx-based boards"
+ depends on PPC_BOOK3S_32
+ select FSL_SOC
+ select ALTIVEC
+ help
+ The Freescale E600 SoCs have 74xx cores.
+
+if PPC_86xx
+
+config GEF_PPC9A
+ bool "GE PPC9A"
+ select DEFAULT_UIMAGE
+ select MMIO_NVRAM
+ select GPIOLIB
+ select GE_FPGA
+ help
+ This option enables support for the GE PPC9A.
+
+config GEF_SBC310
+ bool "GE SBC310"
+ select DEFAULT_UIMAGE
+ select MMIO_NVRAM
+ select GPIOLIB
+ select GE_FPGA
+ help
+ This option enables support for the GE SBC310.
+
+config GEF_SBC610
+ bool "GE SBC610"
+ select DEFAULT_UIMAGE
+ select MMIO_NVRAM
+ select GPIOLIB
+ select GE_FPGA
+ select HAVE_RAPIDIO
+ help
+ This option enables support for the GE SBC610.
+
+config MVME7100
+ bool "Artesyn MVME7100"
+ help
+ This option enables support for the Emerson/Artesyn MVME7100 board.
+
+endif
+
+config MPC8641
+ bool
+ select HAVE_PCI
+ select FSL_PCI if PCI
+ select PPC_UDBG_16550
+ select MPIC
+ default y if GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \
+ || MVME7100
+
+config MPC8610
+ bool
+ select HAVE_PCI
+ select FSL_PCI if PCI
+ select PPC_UDBG_16550
+ select MPIC
diff --git a/arch/powerpc/platforms/86xx/Makefile b/arch/powerpc/platforms/86xx/Makefile
new file mode 100644
index 000000000..dafbc037f
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PowerPC 86xx linux kernel.
+#
+
+obj-y := pic.o common.o
+obj-$(CONFIG_SMP) += mpc86xx_smp.o
+obj-$(CONFIG_GEF_SBC610) += gef_sbc610.o
+obj-$(CONFIG_GEF_SBC310) += gef_sbc310.o
+obj-$(CONFIG_GEF_PPC9A) += gef_ppc9a.o
+obj-$(CONFIG_MVME7100) += mvme7100.o
diff --git a/arch/powerpc/platforms/86xx/common.c b/arch/powerpc/platforms/86xx/common.c
new file mode 100644
index 000000000..a4a550527
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/common.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Routines common to most mpc86xx-based boards.
+ */
+
+#include <linux/init.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of_platform.h>
+#include <asm/reg.h>
+#include <asm/synch.h>
+
+#include "mpc86xx.h"
+
+static const struct of_device_id mpc86xx_common_ids[] __initconst = {
+ { .type = "soc", },
+ { .compatible = "soc", },
+ { .compatible = "simple-bus", },
+ { .name = "localbus", },
+ { .compatible = "gianfar", },
+ { .compatible = "fsl,mpc8641-pcie", },
+ {},
+};
+
+int __init mpc86xx_common_publish_devices(void)
+{
+ return of_platform_bus_probe(NULL, mpc86xx_common_ids, NULL);
+}
+
+long __init mpc86xx_time_init(void)
+{
+ unsigned int temp;
+
+ /* Set the time base to zero */
+ mtspr(SPRN_TBWL, 0);
+ mtspr(SPRN_TBWU, 0);
+
+ temp = mfspr(SPRN_HID0);
+ temp |= HID0_TBEN;
+ mtspr(SPRN_HID0, temp);
+ isync();
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
new file mode 100644
index 000000000..f7f98cca7
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE PPC9A board support
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/ge/ge_pic.h>
+
+#include "mpc86xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG (fmt...) do { printk(KERN_ERR "PPC9A: " fmt); } while (0)
+#else
+#define DBG (fmt...) do { } while (0)
+#endif
+
+void __iomem *ppc9a_regs;
+
+static void __init gef_ppc9a_init_irq(void)
+{
+ struct device_node *cascade_node = NULL;
+
+ mpc86xx_init_irq();
+
+ /*
+ * There is a simple interrupt handler in the main FPGA, this needs
+ * to be cascaded into the MPIC
+ */
+ cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic-1.00");
+ if (!cascade_node) {
+ printk(KERN_WARNING "PPC9A: No FPGA PIC\n");
+ return;
+ }
+
+ gef_pic_init(cascade_node);
+ of_node_put(cascade_node);
+}
+
+static void __init gef_ppc9a_setup_arch(void)
+{
+ struct device_node *regs;
+
+ printk(KERN_INFO "GE Intelligent Platforms PPC9A 6U VME SBC\n");
+
+#ifdef CONFIG_SMP
+ mpc86xx_smp_init();
+#endif
+
+ fsl_pci_assign_primary();
+
+ /* Remap basic board registers */
+ regs = of_find_compatible_node(NULL, NULL, "gef,ppc9a-fpga-regs");
+ if (regs) {
+ ppc9a_regs = of_iomap(regs, 0);
+ if (ppc9a_regs == NULL)
+ printk(KERN_WARNING "Unable to map board registers\n");
+ of_node_put(regs);
+ }
+
+#if defined(CONFIG_MMIO_NVRAM)
+ mmio_nvram_init();
+#endif
+}
+
+/* Return the PCB revision */
+static unsigned int gef_ppc9a_get_pcb_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread32be(ppc9a_regs);
+ return (reg >> 16) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int gef_ppc9a_get_board_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread32be(ppc9a_regs);
+ return (reg >> 8) & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int gef_ppc9a_get_fpga_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread32be(ppc9a_regs);
+ return reg & 0xf;
+}
+
+/* Return VME Geographical Address */
+static unsigned int gef_ppc9a_get_vme_geo_addr(void)
+{
+ unsigned int reg;
+
+ reg = ioread32be(ppc9a_regs + 0x4);
+ return reg & 0x1f;
+}
+
+/* Return VME System Controller Status */
+static unsigned int gef_ppc9a_get_vme_is_syscon(void)
+{
+ unsigned int reg;
+
+ reg = ioread32be(ppc9a_regs + 0x4);
+ return (reg >> 9) & 0x1;
+}
+
+static void gef_ppc9a_show_cpuinfo(struct seq_file *m)
+{
+ uint svid = mfspr(SPRN_SVR);
+
+ seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+ seq_printf(m, "Revision\t: %u%c\n", gef_ppc9a_get_pcb_rev(),
+ ('A' + gef_ppc9a_get_board_rev()));
+ seq_printf(m, "FPGA Revision\t: %u\n", gef_ppc9a_get_fpga_rev());
+
+ seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+ seq_printf(m, "VME geo. addr\t: %u\n", gef_ppc9a_get_vme_geo_addr());
+
+ seq_printf(m, "VME syscon\t: %s\n",
+ gef_ppc9a_get_vme_is_syscon() ? "yes" : "no");
+}
+
+static void gef_ppc9a_nec_fixup(struct pci_dev *pdev)
+{
+ unsigned int val;
+
+ /* Do not do the fixup on other platforms! */
+ if (!machine_is(gef_ppc9a))
+ return;
+
+ printk(KERN_INFO "Running NEC uPD720101 Fixup\n");
+
+ /* Ensure ports 1, 2, 3, 4 & 5 are enabled */
+ pci_read_config_dword(pdev, 0xe0, &val);
+ pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x5);
+
+ /* System clock is 48-MHz Oscillator and EHCI Enabled. */
+ pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+ gef_ppc9a_nec_fixup);
+
+machine_arch_initcall(gef_ppc9a, mpc86xx_common_publish_devices);
+
+define_machine(gef_ppc9a) {
+ .name = "GE PPC9A",
+ .compatible = "gef,ppc9a",
+ .setup_arch = gef_ppc9a_setup_arch,
+ .init_IRQ = gef_ppc9a_init_irq,
+ .show_cpuinfo = gef_ppc9a_show_cpuinfo,
+ .get_irq = mpic_get_irq,
+ .time_init = mpc86xx_time_init,
+ .progress = udbg_progress,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
new file mode 100644
index 000000000..689835f7f
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE SBC310 board support
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/ge/ge_pic.h>
+
+#include "mpc86xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG (fmt...) do { printk(KERN_ERR "SBC310: " fmt); } while (0)
+#else
+#define DBG (fmt...) do { } while (0)
+#endif
+
+void __iomem *sbc310_regs;
+
+static void __init gef_sbc310_init_irq(void)
+{
+ struct device_node *cascade_node = NULL;
+
+ mpc86xx_init_irq();
+
+ /*
+ * There is a simple interrupt handler in the main FPGA, this needs
+ * to be cascaded into the MPIC
+ */
+ cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic");
+ if (!cascade_node) {
+ printk(KERN_WARNING "SBC310: No FPGA PIC\n");
+ return;
+ }
+
+ gef_pic_init(cascade_node);
+ of_node_put(cascade_node);
+}
+
+static void __init gef_sbc310_setup_arch(void)
+{
+ struct device_node *regs;
+ printk(KERN_INFO "GE Intelligent Platforms SBC310 6U VPX SBC\n");
+
+#ifdef CONFIG_SMP
+ mpc86xx_smp_init();
+#endif
+
+ fsl_pci_assign_primary();
+
+ /* Remap basic board registers */
+ regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs");
+ if (regs) {
+ sbc310_regs = of_iomap(regs, 0);
+ if (sbc310_regs == NULL)
+ printk(KERN_WARNING "Unable to map board registers\n");
+ of_node_put(regs);
+ }
+
+#if defined(CONFIG_MMIO_NVRAM)
+ mmio_nvram_init();
+#endif
+}
+
+/* Return the PCB revision */
+static unsigned int gef_sbc310_get_board_id(void)
+{
+ unsigned int reg;
+
+ reg = ioread32(sbc310_regs);
+ return reg & 0xff;
+}
+
+/* Return the PCB revision */
+static unsigned int gef_sbc310_get_pcb_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread32(sbc310_regs);
+ return (reg >> 8) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int gef_sbc310_get_board_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread32(sbc310_regs);
+ return (reg >> 16) & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int gef_sbc310_get_fpga_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread32(sbc310_regs);
+ return (reg >> 24) & 0xf;
+}
+
+static void gef_sbc310_show_cpuinfo(struct seq_file *m)
+{
+ uint svid = mfspr(SPRN_SVR);
+
+ seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+ seq_printf(m, "Board ID\t: 0x%2.2x\n", gef_sbc310_get_board_id());
+ seq_printf(m, "Revision\t: %u%c\n", gef_sbc310_get_pcb_rev(),
+ ('A' + gef_sbc310_get_board_rev() - 1));
+ seq_printf(m, "FPGA Revision\t: %u\n", gef_sbc310_get_fpga_rev());
+
+ seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+
+}
+
+static void gef_sbc310_nec_fixup(struct pci_dev *pdev)
+{
+ unsigned int val;
+
+ /* Do not do the fixup on other platforms! */
+ if (!machine_is(gef_sbc310))
+ return;
+
+ printk(KERN_INFO "Running NEC uPD720101 Fixup\n");
+
+ /* Ensure only ports 1 & 2 are enabled */
+ pci_read_config_dword(pdev, 0xe0, &val);
+ pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x2);
+
+ /* System clock is 48-MHz Oscillator and EHCI Enabled. */
+ pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+ gef_sbc310_nec_fixup);
+
+machine_arch_initcall(gef_sbc310, mpc86xx_common_publish_devices);
+
+define_machine(gef_sbc310) {
+ .name = "GE SBC310",
+ .compatible = "gef,sbc310",
+ .setup_arch = gef_sbc310_setup_arch,
+ .init_IRQ = gef_sbc310_init_irq,
+ .show_cpuinfo = gef_sbc310_show_cpuinfo,
+ .get_irq = mpic_get_irq,
+ .time_init = mpc86xx_time_init,
+ .progress = udbg_progress,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
new file mode 100644
index 000000000..365f51118
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE SBC610 board support
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ *
+ * NEC fixup adapted from arch/mips/pci/fixup-lm2e.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#include <asm/mpic.h>
+#include <asm/nvram.h>
+
+#include <sysdev/fsl_pci.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/ge/ge_pic.h>
+
+#include "mpc86xx.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG (fmt...) do { printk(KERN_ERR "SBC610: " fmt); } while (0)
+#else
+#define DBG (fmt...) do { } while (0)
+#endif
+
+void __iomem *sbc610_regs;
+
+static void __init gef_sbc610_init_irq(void)
+{
+ struct device_node *cascade_node = NULL;
+
+ mpc86xx_init_irq();
+
+ /*
+ * There is a simple interrupt handler in the main FPGA, this needs
+ * to be cascaded into the MPIC
+ */
+ cascade_node = of_find_compatible_node(NULL, NULL, "gef,fpga-pic");
+ if (!cascade_node) {
+ printk(KERN_WARNING "SBC610: No FPGA PIC\n");
+ return;
+ }
+
+ gef_pic_init(cascade_node);
+ of_node_put(cascade_node);
+}
+
+static void __init gef_sbc610_setup_arch(void)
+{
+ struct device_node *regs;
+
+ printk(KERN_INFO "GE Intelligent Platforms SBC610 6U VPX SBC\n");
+
+#ifdef CONFIG_SMP
+ mpc86xx_smp_init();
+#endif
+
+ fsl_pci_assign_primary();
+
+ /* Remap basic board registers */
+ regs = of_find_compatible_node(NULL, NULL, "gef,fpga-regs");
+ if (regs) {
+ sbc610_regs = of_iomap(regs, 0);
+ if (sbc610_regs == NULL)
+ printk(KERN_WARNING "Unable to map board registers\n");
+ of_node_put(regs);
+ }
+
+#if defined(CONFIG_MMIO_NVRAM)
+ mmio_nvram_init();
+#endif
+}
+
+/* Return the PCB revision */
+static unsigned int gef_sbc610_get_pcb_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread32(sbc610_regs);
+ return (reg >> 8) & 0xff;
+}
+
+/* Return the board (software) revision */
+static unsigned int gef_sbc610_get_board_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread32(sbc610_regs);
+ return (reg >> 16) & 0xff;
+}
+
+/* Return the FPGA revision */
+static unsigned int gef_sbc610_get_fpga_rev(void)
+{
+ unsigned int reg;
+
+ reg = ioread32(sbc610_regs);
+ return (reg >> 24) & 0xf;
+}
+
+static void gef_sbc610_show_cpuinfo(struct seq_file *m)
+{
+ uint svid = mfspr(SPRN_SVR);
+
+ seq_printf(m, "Vendor\t\t: GE Intelligent Platforms\n");
+
+ seq_printf(m, "Revision\t: %u%c\n", gef_sbc610_get_pcb_rev(),
+ ('A' + gef_sbc610_get_board_rev() - 1));
+ seq_printf(m, "FPGA Revision\t: %u\n", gef_sbc610_get_fpga_rev());
+
+ seq_printf(m, "SVR\t\t: 0x%x\n", svid);
+}
+
+static void gef_sbc610_nec_fixup(struct pci_dev *pdev)
+{
+ unsigned int val;
+
+ /* Do not do the fixup on other platforms! */
+ if (!machine_is(gef_sbc610))
+ return;
+
+ printk(KERN_INFO "Running NEC uPD720101 Fixup\n");
+
+ /* Ensure ports 1, 2, 3, 4 & 5 are enabled */
+ pci_read_config_dword(pdev, 0xe0, &val);
+ pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x5);
+
+ /* System clock is 48-MHz Oscillator and EHCI Enabled. */
+ pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+ gef_sbc610_nec_fixup);
+
+machine_arch_initcall(gef_sbc610, mpc86xx_common_publish_devices);
+
+define_machine(gef_sbc610) {
+ .name = "GE SBC610",
+ .compatible = "gef,sbc610",
+ .setup_arch = gef_sbc610_setup_arch,
+ .init_IRQ = gef_sbc610_init_irq,
+ .show_cpuinfo = gef_sbc610_show_cpuinfo,
+ .get_irq = mpic_get_irq,
+ .time_init = mpc86xx_time_init,
+ .progress = udbg_progress,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/mpc86xx.h b/arch/powerpc/platforms/86xx/mpc86xx.h
new file mode 100644
index 000000000..61e52c757
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mpc86xx.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+#ifndef __MPC86XX_H__
+#define __MPC86XX_H__
+
+/*
+ * Declaration for the various functions exported by the
+ * mpc86xx_* files. Mostly for use by mpc86xx_setup().
+ */
+
+extern void mpc86xx_smp_init(void);
+extern void mpc86xx_init_irq(void);
+extern long mpc86xx_time_init(void);
+extern int mpc86xx_common_publish_devices(void);
+
+#endif /* __MPC86XX_H__ */
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
new file mode 100644
index 000000000..8a7e55acf
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Xianghua Xiao <x.xiao@freescale.com>
+ * Zhang Wei <wei.zhang@freescale.com>
+ *
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/pgtable.h>
+
+#include <asm/code-patching.h>
+#include <asm/page.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <asm/cacheflush.h>
+#include <asm/inst.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc86xx.h"
+
+extern void __secondary_start_mpc86xx(void);
+
+#define MCM_PORT_CONFIG_OFFSET 0x10
+
+/* Offset from CCSRBAR */
+#define MPC86xx_MCM_OFFSET (0x1000)
+#define MPC86xx_MCM_SIZE (0x1000)
+
+static void __init
+smp_86xx_release_core(int nr)
+{
+ __be32 __iomem *mcm_vaddr;
+ unsigned long pcr;
+
+ if (nr < 0 || nr >= NR_CPUS)
+ return;
+
+ /*
+ * Startup Core #nr.
+ */
+ mcm_vaddr = ioremap(get_immrbase() + MPC86xx_MCM_OFFSET,
+ MPC86xx_MCM_SIZE);
+ pcr = in_be32(mcm_vaddr + (MCM_PORT_CONFIG_OFFSET >> 2));
+ pcr |= 1 << (nr + 24);
+ out_be32(mcm_vaddr + (MCM_PORT_CONFIG_OFFSET >> 2), pcr);
+
+ iounmap(mcm_vaddr);
+}
+
+
+static int __init
+smp_86xx_kick_cpu(int nr)
+{
+ unsigned int save_vector;
+ unsigned long target, flags;
+ int n = 0;
+ unsigned int *vector = (unsigned int *)(KERNELBASE + 0x100);
+
+ if (nr < 0 || nr >= NR_CPUS)
+ return -ENOENT;
+
+ pr_debug("smp_86xx_kick_cpu: kick CPU #%d\n", nr);
+
+ local_irq_save(flags);
+
+ /* Save reset vector */
+ save_vector = *vector;
+
+ /* Setup fake reset vector to call __secondary_start_mpc86xx. */
+ target = (unsigned long) __secondary_start_mpc86xx;
+ patch_branch(vector, target, BRANCH_SET_LINK);
+
+ /* Kick that CPU */
+ smp_86xx_release_core(nr);
+
+ /* Wait a bit for the CPU to take the exception. */
+ while ((__secondary_hold_acknowledge != nr) && (n++, n < 1000))
+ mdelay(1);
+
+ /* Restore the exception vector */
+ patch_instruction(vector, ppc_inst(save_vector));
+
+ local_irq_restore(flags);
+
+ pr_debug("wait CPU #%d for %d msecs.\n", nr, n);
+
+ return 0;
+}
+
+
+static void __init
+smp_86xx_setup_cpu(int cpu_nr)
+{
+ mpic_setup_this_cpu();
+}
+
+
+struct smp_ops_t smp_86xx_ops = {
+ .cause_nmi_ipi = NULL,
+ .message_pass = smp_mpic_message_pass,
+ .probe = smp_mpic_probe,
+ .kick_cpu = smp_86xx_kick_cpu,
+ .setup_cpu = smp_86xx_setup_cpu,
+ .take_timebase = smp_generic_take_timebase,
+ .give_timebase = smp_generic_give_timebase,
+};
+
+
+void __init
+mpc86xx_smp_init(void)
+{
+ smp_ops = &smp_86xx_ops;
+}
diff --git a/arch/powerpc/platforms/86xx/mvme7100.c b/arch/powerpc/platforms/86xx/mvme7100.c
new file mode 100644
index 000000000..cee49ecd3
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mvme7100.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Board setup routines for the Emerson/Artesyn MVME7100
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Author: Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ *
+ * Based on earlier code by:
+ *
+ * Ajit Prem <ajit.prem@emerson.com>
+ * Copyright 2008 Emerson
+ *
+ * USB host fixup is borrowed by:
+ *
+ * Martyn Welch <martyn.welch@ge.com>
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ */
+
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_address.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc86xx.h"
+
+#define MVME7100_INTERRUPT_REG_2_OFFSET 0x05
+#define MVME7100_DS1375_MASK 0x40
+#define MVME7100_MAX6649_MASK 0x20
+#define MVME7100_ABORT_MASK 0x10
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme7100_setup_arch(void)
+{
+ struct device_node *bcsr_node;
+ void __iomem *mvme7100_regs = NULL;
+ u8 reg;
+
+ if (ppc_md.progress)
+ ppc_md.progress("mvme7100_setup_arch()", 0);
+
+#ifdef CONFIG_SMP
+ mpc86xx_smp_init();
+#endif
+
+ fsl_pci_assign_primary();
+
+ /* Remap BCSR registers */
+ bcsr_node = of_find_compatible_node(NULL, NULL,
+ "artesyn,mvme7100-bcsr");
+ if (bcsr_node) {
+ mvme7100_regs = of_iomap(bcsr_node, 0);
+ of_node_put(bcsr_node);
+ }
+
+ if (mvme7100_regs) {
+ /* Disable ds1375, max6649, and abort interrupts */
+ reg = readb(mvme7100_regs + MVME7100_INTERRUPT_REG_2_OFFSET);
+ reg |= MVME7100_DS1375_MASK | MVME7100_MAX6649_MASK
+ | MVME7100_ABORT_MASK;
+ writeb(reg, mvme7100_regs + MVME7100_INTERRUPT_REG_2_OFFSET);
+ } else
+ pr_warn("Unable to map board registers\n");
+
+ pr_info("MVME7100 board from Artesyn\n");
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mvme7100_probe(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ return of_flat_dt_is_compatible(root, "artesyn,MVME7100");
+}
+
+static void mvme7100_usb_host_fixup(struct pci_dev *pdev)
+{
+ unsigned int val;
+
+ if (!machine_is(mvme7100))
+ return;
+
+ /* Ensure only ports 1 & 2 are enabled */
+ pci_read_config_dword(pdev, 0xe0, &val);
+ pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x2);
+
+ /* System clock is 48-MHz Oscillator and EHCI Enabled. */
+ pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+ mvme7100_usb_host_fixup);
+
+machine_arch_initcall(mvme7100, mpc86xx_common_publish_devices);
+
+define_machine(mvme7100) {
+ .name = "MVME7100",
+ .probe = mvme7100_probe,
+ .setup_arch = mvme7100_setup_arch,
+ .init_IRQ = mpc86xx_init_irq,
+ .get_irq = mpic_get_irq,
+ .time_init = mpc86xx_time_init,
+ .progress = udbg_progress,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
new file mode 100644
index 000000000..9ca36de23
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+#include <asm/mpic.h>
+#include <asm/i8259.h>
+
+#include "mpc86xx.h"
+
+#ifdef CONFIG_PPC_I8259
+static void mpc86xx_8259_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int cascade_irq = i8259_irq();
+
+ if (cascade_irq)
+ generic_handle_irq(cascade_irq);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+#endif /* CONFIG_PPC_I8259 */
+
+void __init mpc86xx_init_irq(void)
+{
+#ifdef CONFIG_PPC_I8259
+ struct device_node *np;
+ struct device_node *cascade_node = NULL;
+ int cascade_irq;
+#endif
+
+ struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SINGLE_DEST_CPU,
+ 0, 256, " MPIC ");
+ BUG_ON(mpic == NULL);
+
+ mpic_init(mpic);
+
+#ifdef CONFIG_PPC_I8259
+ /* Initialize i8259 controller */
+ for_each_node_by_type(np, "interrupt-controller")
+ if (of_device_is_compatible(np, "chrp,iic")) {
+ cascade_node = np;
+ break;
+ }
+
+ if (cascade_node == NULL) {
+ printk(KERN_DEBUG "Could not find i8259 PIC\n");
+ return;
+ }
+
+ cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+ if (!cascade_irq) {
+ printk(KERN_ERR "Failed to map cascade interrupt\n");
+ return;
+ }
+
+ i8259_init(cascade_node, 0);
+ of_node_put(cascade_node);
+
+ irq_set_chained_handler(cascade_irq, mpc86xx_8259_cascade);
+#endif
+}
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
new file mode 100644
index 000000000..a14d9d899
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-2.0
+config CPM1
+ bool
+ select CPM
+
+choice
+ prompt "8xx Machine Type"
+ depends on PPC_8xx
+ default MPC885ADS
+
+config MPC8XXFADS
+ bool "FADS"
+
+config MPC86XADS
+ bool "MPC86XADS"
+ select CPM1
+ help
+ MPC86x Application Development System by Freescale Semiconductor.
+ The MPC86xADS is meant to serve as a platform for s/w and h/w
+ development around the MPC86X processor families.
+
+config MPC885ADS
+ bool "MPC885ADS"
+ select CPM1
+ select OF_DYNAMIC
+ help
+ Freescale Semiconductor MPC885 Application Development System (ADS).
+ Also known as DUET.
+ The MPC885ADS is meant to serve as a platform for s/w and h/w
+ development around the MPC885 processor family.
+
+config PPC_EP88XC
+ bool "Embedded Planet EP88xC (a.k.a. CWH-PPC-885XN-VE)"
+ select CPM1
+ help
+ This enables support for the Embedded Planet EP88xC board.
+
+ This board is also resold by Freescale as the QUICCStart
+ MPC885 Evaluation System and/or the CWH-PPC-885XN-VE.
+
+config PPC_ADDER875
+ bool "Analogue & Micro Adder 875"
+ select CPM1
+ help
+ This enables support for the Analogue & Micro Adder 875
+ board.
+
+config TQM8XX
+ bool "TQM8XX"
+ select CPM1
+ help
+ support for the mpc8xx based boards from TQM.
+
+endchoice
+
+menu "Freescale Ethernet driver platform-specific options"
+ depends on (FS_ENET && MPC885ADS)
+
+ config MPC8xx_SECOND_ETH
+ bool "Second Ethernet channel"
+ depends on MPC885ADS
+ default y
+ help
+ This enables support for second Ethernet on MPC885ADS and MPC86xADS boards.
+ The latter will use SCC1, for 885ADS you can select it below.
+
+ choice
+ prompt "Second Ethernet channel"
+ depends on MPC8xx_SECOND_ETH
+ default MPC8xx_SECOND_ETH_FEC2
+
+ config MPC8xx_SECOND_ETH_FEC2
+ bool "FEC2"
+ depends on MPC885ADS
+ help
+ Enable FEC2 to serve as 2-nd Ethernet channel. Note that SMC2
+ (often 2-nd UART) will not work if this is enabled.
+
+ config MPC8xx_SECOND_ETH_SCC3
+ bool "SCC3"
+ depends on MPC885ADS
+ help
+ Enable SCC3 to serve as 2-nd Ethernet channel. Note that SMC1
+ (often 1-nd UART) will not work if this is enabled.
+
+ endchoice
+
+endmenu
+
+#
+# MPC8xx Communication options
+#
+
+menu "MPC8xx CPM Options"
+ depends on PPC_8xx
+
+# This doesn't really belong here, but it is convenient to ask
+# 8xx specific questions.
+comment "Generic MPC8xx Options"
+
+config 8xx_GPIO
+ bool "GPIO API Support"
+ select GPIOLIB
+ select OF_GPIO_MM_GPIOCHIP
+ help
+ Saying Y here will cause the ports on an MPC8xx processor to be used
+ with the GPIO API. If you say N here, the kernel needs less memory.
+
+ If in doubt, say Y here.
+
+config 8xx_CPU15
+ bool "CPU15 Silicon Errata"
+ depends on !HUGETLB_PAGE
+ default y
+ help
+ This enables a workaround for erratum CPU15 on MPC8xx chips.
+ This bug can cause incorrect code execution under certain
+ circumstances. This workaround adds some overhead (a TLB miss
+ every time execution crosses a page boundary), and you may wish
+ to disable it if you have worked around the bug in the compiler
+ (by not placing conditional branches or branches to LR or CTR
+ in the last word of a page, with a target of the last cache
+ line in the next page), or if you have used some other
+ workaround.
+
+ If in doubt, say Y here.
+
+choice
+ prompt "Microcode patch selection"
+ default NO_UCODE_PATCH
+ help
+ Help not implemented yet, coming soon.
+
+config NO_UCODE_PATCH
+ bool "None"
+
+config USB_SOF_UCODE_PATCH
+ bool "USB SOF patch"
+ help
+ Help not implemented yet, coming soon.
+
+config I2C_SPI_UCODE_PATCH
+ bool "I2C/SPI relocation patch"
+ help
+ Help not implemented yet, coming soon.
+
+config I2C_SPI_SMC1_UCODE_PATCH
+ bool "I2C/SPI/SMC1 relocation patch"
+ help
+ Help not implemented yet, coming soon.
+
+config SMC_UCODE_PATCH
+ bool "SMC relocation patch"
+ help
+ This microcode relocates SMC1 and SMC2 parameter RAMs at
+ offset 0x1ec0 and 0x1fc0 to allow extended parameter RAM
+ for SCC3 and SCC4.
+
+endchoice
+
+config UCODE_PATCH
+ bool
+ default y
+ depends on !NO_UCODE_PATCH
+
+menu "8xx advanced setup"
+ depends on PPC_8xx
+
+config PIN_TLB
+ bool "Pinned Kernel TLBs"
+ depends on ADVANCED_OPTIONS
+ help
+ On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each
+ table 4 TLBs can be pinned.
+
+ It reduces the amount of usable TLBs to 28 (ie by 12%). That's the
+ reason why we make it selectable.
+
+ This option does nothing, it just activate the selection of what
+ to pin.
+
+config PIN_TLB_DATA
+ bool "Pinned TLB for DATA"
+ depends on PIN_TLB
+ default y
+ help
+ This pins the first 32 Mbytes of memory with 8M pages.
+
+config PIN_TLB_IMMR
+ bool "Pinned TLB for IMMR"
+ depends on PIN_TLB
+ default y
+ help
+ This pins the IMMR area with a 512kbytes page. In case
+ CONFIG_PIN_TLB_DATA is also selected, it will reduce
+ CONFIG_PIN_TLB_DATA to 24 Mbytes.
+
+endmenu
+
+endmenu
diff --git a/arch/powerpc/platforms/8xx/Makefile b/arch/powerpc/platforms/8xx/Makefile
new file mode 100644
index 000000000..5a098f7d5
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the PowerPC 8xx linux kernel.
+#
+obj-y += m8xx_setup.o machine_check.o pic.o
+obj-$(CONFIG_CPM1) += cpm1.o cpm1-ic.o
+obj-$(CONFIG_UCODE_PATCH) += micropatch.o
+obj-$(CONFIG_MPC885ADS) += mpc885ads_setup.o
+obj-$(CONFIG_MPC86XADS) += mpc86xads_setup.o
+obj-$(CONFIG_PPC_EP88XC) += ep88xc.o
+obj-$(CONFIG_PPC_ADDER875) += adder875.o
+obj-$(CONFIG_TQM8XX) += tqm8xx_setup.o
diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c
new file mode 100644
index 000000000..d02f8dd66
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/adder875.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Analogue & Micro Adder MPC875 board support
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/cpm1.h>
+#include <asm/8xx_immap.h>
+#include <asm/udbg.h>
+
+#include "mpc8xx.h"
+#include "pic.h"
+
+struct cpm_pin {
+ int port, pin, flags;
+};
+
+static __initdata struct cpm_pin adder875_pins[] = {
+ /* SMC1 */
+ {CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+ {CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+ /* MII1 */
+ {CPM_PORTA, 0, CPM_PIN_INPUT},
+ {CPM_PORTA, 1, CPM_PIN_INPUT},
+ {CPM_PORTA, 2, CPM_PIN_INPUT},
+ {CPM_PORTA, 3, CPM_PIN_INPUT},
+ {CPM_PORTA, 4, CPM_PIN_OUTPUT},
+ {CPM_PORTA, 10, CPM_PIN_OUTPUT},
+ {CPM_PORTA, 11, CPM_PIN_OUTPUT},
+ {CPM_PORTB, 19, CPM_PIN_INPUT},
+ {CPM_PORTB, 31, CPM_PIN_INPUT},
+ {CPM_PORTC, 12, CPM_PIN_INPUT},
+ {CPM_PORTC, 13, CPM_PIN_INPUT},
+ {CPM_PORTE, 30, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 31, CPM_PIN_OUTPUT},
+
+ /* MII2 */
+ {CPM_PORTE, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 16, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 21, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 22, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 23, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 24, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 25, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 26, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 27, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 28, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 29, CPM_PIN_OUTPUT},
+};
+
+static void __init init_ioports(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(adder875_pins); i++) {
+ const struct cpm_pin *pin = &adder875_pins[i];
+ cpm1_set_pin(pin->port, pin->pin, pin->flags);
+ }
+
+ cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+
+ /* Set FEC1 and FEC2 to MII mode */
+ clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
+}
+
+static void __init adder875_setup(void)
+{
+ cpm_reset();
+ init_ioports();
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .compatible = "simple-bus", },
+ {},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+ of_platform_bus_probe(NULL, of_bus_ids, NULL);
+ return 0;
+}
+machine_device_initcall(adder875, declare_of_platform_devices);
+
+define_machine(adder875) {
+ .name = "Adder MPC875",
+ .compatible = "analogue-and-micro,adder875",
+ .setup_arch = adder875_setup,
+ .init_IRQ = mpc8xx_pic_init,
+ .get_irq = mpc8xx_get_irq,
+ .restart = mpc8xx_restart,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/cpm1-ic.c b/arch/powerpc/platforms/8xx/cpm1-ic.c
new file mode 100644
index 000000000..a18fc7c99
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/cpm1-ic.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Interrupt controller for the
+ * Communication Processor Module.
+ * Copyright (c) 1997 Dan error_act (dmalek@jlc.net)
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/platform_device.h>
+#include <asm/cpm1.h>
+
+struct cpm_pic_data {
+ cpic8xx_t __iomem *reg;
+ struct irq_domain *host;
+};
+
+static void cpm_mask_irq(struct irq_data *d)
+{
+ struct cpm_pic_data *data = irq_data_get_irq_chip_data(d);
+ unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
+
+ clrbits32(&data->reg->cpic_cimr, (1 << cpm_vec));
+}
+
+static void cpm_unmask_irq(struct irq_data *d)
+{
+ struct cpm_pic_data *data = irq_data_get_irq_chip_data(d);
+ unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
+
+ setbits32(&data->reg->cpic_cimr, (1 << cpm_vec));
+}
+
+static void cpm_end_irq(struct irq_data *d)
+{
+ struct cpm_pic_data *data = irq_data_get_irq_chip_data(d);
+ unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
+
+ out_be32(&data->reg->cpic_cisr, (1 << cpm_vec));
+}
+
+static struct irq_chip cpm_pic = {
+ .name = "CPM PIC",
+ .irq_mask = cpm_mask_irq,
+ .irq_unmask = cpm_unmask_irq,
+ .irq_eoi = cpm_end_irq,
+};
+
+static int cpm_get_irq(struct irq_desc *desc)
+{
+ struct cpm_pic_data *data = irq_desc_get_handler_data(desc);
+ int cpm_vec;
+
+ /*
+ * Get the vector by setting the ACK bit and then reading
+ * the register.
+ */
+ out_be16(&data->reg->cpic_civr, 1);
+ cpm_vec = in_be16(&data->reg->cpic_civr);
+ cpm_vec >>= 11;
+
+ return irq_linear_revmap(data->host, cpm_vec);
+}
+
+static void cpm_cascade(struct irq_desc *desc)
+{
+ generic_handle_irq(cpm_get_irq(desc));
+}
+
+static int cpm_pic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ irq_set_chip_data(virq, h->host_data);
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_chip_and_handler(virq, &cpm_pic, handle_fasteoi_irq);
+ return 0;
+}
+
+static const struct irq_domain_ops cpm_pic_host_ops = {
+ .map = cpm_pic_host_map,
+};
+
+static int cpm_pic_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+ int irq;
+ struct cpm_pic_data *data;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->reg = devm_ioremap(dev, res->start, resource_size(res));
+ if (!data->reg)
+ return -ENODEV;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ /* Initialize the CPM interrupt controller. */
+ out_be32(&data->reg->cpic_cicr,
+ (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) |
+ ((virq_to_hw(irq) / 2) << 13) | CICR_HP_MASK);
+
+ out_be32(&data->reg->cpic_cimr, 0);
+
+ data->host = irq_domain_add_linear(dev->of_node, 64, &cpm_pic_host_ops, data);
+ if (!data->host)
+ return -ENODEV;
+
+ irq_set_handler_data(irq, data);
+ irq_set_chained_handler(irq, cpm_cascade);
+
+ setbits32(&data->reg->cpic_cicr, CICR_IEN);
+
+ return 0;
+}
+
+static const struct of_device_id cpm_pic_match[] = {
+ {
+ .compatible = "fsl,cpm1-pic",
+ }, {
+ .type = "cpm-pic",
+ .compatible = "CPM",
+ }, {},
+};
+
+static struct platform_driver cpm_pic_driver = {
+ .driver = {
+ .name = "cpm-pic",
+ .of_match_table = cpm_pic_match,
+ },
+ .probe = cpm_pic_probe,
+};
+
+static int __init cpm_pic_init(void)
+{
+ return platform_driver_register(&cpm_pic_driver);
+}
+arch_initcall(cpm_pic_init);
+
+/*
+ * The CPM can generate the error interrupt when there is a race condition
+ * between generating and masking interrupts. All we have to do is ACK it
+ * and return. This is a no-op function so we don't need any special
+ * tests in the interrupt handler.
+ */
+static irqreturn_t cpm_error_interrupt(int irq, void *dev)
+{
+ return IRQ_HANDLED;
+}
+
+static int cpm_error_probe(struct platform_device *pdev)
+{
+ int irq;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ return request_irq(irq, cpm_error_interrupt, IRQF_NO_THREAD, "error", NULL);
+}
+
+static const struct of_device_id cpm_error_ids[] = {
+ { .compatible = "fsl,cpm1" },
+ { .type = "cpm" },
+ {},
+};
+
+static struct platform_driver cpm_error_driver = {
+ .driver = {
+ .name = "cpm-error",
+ .of_match_table = cpm_error_ids,
+ },
+ .probe = cpm_error_probe,
+};
+
+static int __init cpm_error_init(void)
+{
+ return platform_driver_register(&cpm_error_driver);
+}
+subsys_initcall(cpm_error_init);
diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
new file mode 100644
index 000000000..ebb5f6a27
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -0,0 +1,636 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * General Purpose functions for the global management of the
+ * Communication Processor Module.
+ * Copyright (c) 1997 Dan error_act (dmalek@jlc.net)
+ *
+ * In addition to the individual control of the communication
+ * channels, there are a few functions that globally affect the
+ * communication processor.
+ *
+ * Buffer descriptors must be allocated from the dual ported memory
+ * space. The allocator for that is here. When the communication
+ * process is reset, we reclaim the memory available. There is
+ * currently no deallocator for this memory.
+ * The amount of space available is platform dependent. On the
+ * MBX, the EPPC software loads additional microcode into the
+ * communication processor, and uses some of the DP ram for this
+ * purpose. Current, the first 512 bytes and the last 256 bytes of
+ * memory are used. Right now I am conservative and only use the
+ * memory that can never be used for microcode. If there are
+ * applications that require more DP ram, we can expand the boundaries
+ * but then we have to be careful of any downloaded microcode.
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+#include <asm/page.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/io.h>
+#include <asm/rheap.h>
+#include <asm/cpm.h>
+
+#include <sysdev/fsl_soc.h>
+
+#ifdef CONFIG_8xx_GPIO
+#include <linux/gpio/legacy-of-mm-gpiochip.h>
+#endif
+
+#define CPM_MAP_SIZE (0x4000)
+
+cpm8xx_t __iomem *cpmp; /* Pointer to comm processor space */
+immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE;
+
+void __init cpm_reset(void)
+{
+ cpmp = &mpc8xx_immr->im_cpm;
+
+#ifndef CONFIG_PPC_EARLY_DEBUG_CPM
+ /* Perform a reset. */
+ out_be16(&cpmp->cp_cpcr, CPM_CR_RST | CPM_CR_FLG);
+
+ /* Wait for it. */
+ while (in_be16(&cpmp->cp_cpcr) & CPM_CR_FLG);
+#endif
+
+#ifdef CONFIG_UCODE_PATCH
+ cpm_load_patch(cpmp);
+#endif
+
+ /*
+ * Set SDMA Bus Request priority 5.
+ * On 860T, this also enables FEC priority 6. I am not sure
+ * this is what we really want for some applications, but the
+ * manual recommends it.
+ * Bit 25, FAM can also be set to use FEC aggressive mode (860T).
+ */
+ if ((mfspr(SPRN_IMMR) & 0xffff) == 0x0900) /* MPC885 */
+ out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 0x40);
+ else
+ out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 1);
+}
+
+static DEFINE_SPINLOCK(cmd_lock);
+
+#define MAX_CR_CMD_LOOPS 10000
+
+int cpm_command(u32 command, u8 opcode)
+{
+ int i, ret;
+ unsigned long flags;
+
+ if (command & 0xffffff03)
+ return -EINVAL;
+
+ spin_lock_irqsave(&cmd_lock, flags);
+
+ ret = 0;
+ out_be16(&cpmp->cp_cpcr, command | CPM_CR_FLG | (opcode << 8));
+ for (i = 0; i < MAX_CR_CMD_LOOPS; i++)
+ if ((in_be16(&cpmp->cp_cpcr) & CPM_CR_FLG) == 0)
+ goto out;
+
+ printk(KERN_ERR "%s(): Not able to issue CPM command\n", __func__);
+ ret = -EIO;
+out:
+ spin_unlock_irqrestore(&cmd_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(cpm_command);
+
+/*
+ * Set a baud rate generator. This needs lots of work. There are
+ * four BRGs, any of which can be wired to any channel.
+ * The internal baud rate clock is the system clock divided by 16.
+ * This assumes the baudrate is 16x oversampled by the uart.
+ */
+#define BRG_INT_CLK (get_brgfreq())
+#define BRG_UART_CLK (BRG_INT_CLK/16)
+#define BRG_UART_CLK_DIV16 (BRG_UART_CLK/16)
+
+void
+cpm_setbrg(uint brg, uint rate)
+{
+ u32 __iomem *bp;
+
+ /* This is good enough to get SMCs running..... */
+ bp = &cpmp->cp_brgc1;
+ bp += brg;
+ /*
+ * The BRG has a 12-bit counter. For really slow baud rates (or
+ * really fast processors), we may have to further divide by 16.
+ */
+ if (((BRG_UART_CLK / rate) - 1) < 4096)
+ out_be32(bp, (((BRG_UART_CLK / rate) - 1) << 1) | CPM_BRG_EN);
+ else
+ out_be32(bp, (((BRG_UART_CLK_DIV16 / rate) - 1) << 1) |
+ CPM_BRG_EN | CPM_BRG_DIV16);
+}
+EXPORT_SYMBOL(cpm_setbrg);
+
+struct cpm_ioport16 {
+ __be16 dir, par, odr_sor, dat, intr;
+ __be16 res[3];
+};
+
+struct cpm_ioport32b {
+ __be32 dir, par, odr, dat;
+};
+
+struct cpm_ioport32e {
+ __be32 dir, par, sor, odr, dat;
+};
+
+static void __init cpm1_set_pin32(int port, int pin, int flags)
+{
+ struct cpm_ioport32e __iomem *iop;
+ pin = 1 << (31 - pin);
+
+ if (port == CPM_PORTB)
+ iop = (struct cpm_ioport32e __iomem *)
+ &mpc8xx_immr->im_cpm.cp_pbdir;
+ else
+ iop = (struct cpm_ioport32e __iomem *)
+ &mpc8xx_immr->im_cpm.cp_pedir;
+
+ if (flags & CPM_PIN_OUTPUT)
+ setbits32(&iop->dir, pin);
+ else
+ clrbits32(&iop->dir, pin);
+
+ if (!(flags & CPM_PIN_GPIO))
+ setbits32(&iop->par, pin);
+ else
+ clrbits32(&iop->par, pin);
+
+ if (port == CPM_PORTB) {
+ if (flags & CPM_PIN_OPENDRAIN)
+ setbits16(&mpc8xx_immr->im_cpm.cp_pbodr, pin);
+ else
+ clrbits16(&mpc8xx_immr->im_cpm.cp_pbodr, pin);
+ }
+
+ if (port == CPM_PORTE) {
+ if (flags & CPM_PIN_SECONDARY)
+ setbits32(&iop->sor, pin);
+ else
+ clrbits32(&iop->sor, pin);
+
+ if (flags & CPM_PIN_OPENDRAIN)
+ setbits32(&mpc8xx_immr->im_cpm.cp_peodr, pin);
+ else
+ clrbits32(&mpc8xx_immr->im_cpm.cp_peodr, pin);
+ }
+}
+
+static void __init cpm1_set_pin16(int port, int pin, int flags)
+{
+ struct cpm_ioport16 __iomem *iop =
+ (struct cpm_ioport16 __iomem *)&mpc8xx_immr->im_ioport;
+
+ pin = 1 << (15 - pin);
+
+ if (port != 0)
+ iop += port - 1;
+
+ if (flags & CPM_PIN_OUTPUT)
+ setbits16(&iop->dir, pin);
+ else
+ clrbits16(&iop->dir, pin);
+
+ if (!(flags & CPM_PIN_GPIO))
+ setbits16(&iop->par, pin);
+ else
+ clrbits16(&iop->par, pin);
+
+ if (port == CPM_PORTA) {
+ if (flags & CPM_PIN_OPENDRAIN)
+ setbits16(&iop->odr_sor, pin);
+ else
+ clrbits16(&iop->odr_sor, pin);
+ }
+ if (port == CPM_PORTC) {
+ if (flags & CPM_PIN_SECONDARY)
+ setbits16(&iop->odr_sor, pin);
+ else
+ clrbits16(&iop->odr_sor, pin);
+ if (flags & CPM_PIN_FALLEDGE)
+ setbits16(&iop->intr, pin);
+ else
+ clrbits16(&iop->intr, pin);
+ }
+}
+
+void __init cpm1_set_pin(enum cpm_port port, int pin, int flags)
+{
+ if (port == CPM_PORTB || port == CPM_PORTE)
+ cpm1_set_pin32(port, pin, flags);
+ else
+ cpm1_set_pin16(port, pin, flags);
+}
+
+int __init cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
+{
+ int shift;
+ int i, bits = 0;
+ u32 __iomem *reg;
+ u32 mask = 7;
+
+ u8 clk_map[][3] = {
+ {CPM_CLK_SCC1, CPM_BRG1, 0},
+ {CPM_CLK_SCC1, CPM_BRG2, 1},
+ {CPM_CLK_SCC1, CPM_BRG3, 2},
+ {CPM_CLK_SCC1, CPM_BRG4, 3},
+ {CPM_CLK_SCC1, CPM_CLK1, 4},
+ {CPM_CLK_SCC1, CPM_CLK2, 5},
+ {CPM_CLK_SCC1, CPM_CLK3, 6},
+ {CPM_CLK_SCC1, CPM_CLK4, 7},
+
+ {CPM_CLK_SCC2, CPM_BRG1, 0},
+ {CPM_CLK_SCC2, CPM_BRG2, 1},
+ {CPM_CLK_SCC2, CPM_BRG3, 2},
+ {CPM_CLK_SCC2, CPM_BRG4, 3},
+ {CPM_CLK_SCC2, CPM_CLK1, 4},
+ {CPM_CLK_SCC2, CPM_CLK2, 5},
+ {CPM_CLK_SCC2, CPM_CLK3, 6},
+ {CPM_CLK_SCC2, CPM_CLK4, 7},
+
+ {CPM_CLK_SCC3, CPM_BRG1, 0},
+ {CPM_CLK_SCC3, CPM_BRG2, 1},
+ {CPM_CLK_SCC3, CPM_BRG3, 2},
+ {CPM_CLK_SCC3, CPM_BRG4, 3},
+ {CPM_CLK_SCC3, CPM_CLK5, 4},
+ {CPM_CLK_SCC3, CPM_CLK6, 5},
+ {CPM_CLK_SCC3, CPM_CLK7, 6},
+ {CPM_CLK_SCC3, CPM_CLK8, 7},
+
+ {CPM_CLK_SCC4, CPM_BRG1, 0},
+ {CPM_CLK_SCC4, CPM_BRG2, 1},
+ {CPM_CLK_SCC4, CPM_BRG3, 2},
+ {CPM_CLK_SCC4, CPM_BRG4, 3},
+ {CPM_CLK_SCC4, CPM_CLK5, 4},
+ {CPM_CLK_SCC4, CPM_CLK6, 5},
+ {CPM_CLK_SCC4, CPM_CLK7, 6},
+ {CPM_CLK_SCC4, CPM_CLK8, 7},
+
+ {CPM_CLK_SMC1, CPM_BRG1, 0},
+ {CPM_CLK_SMC1, CPM_BRG2, 1},
+ {CPM_CLK_SMC1, CPM_BRG3, 2},
+ {CPM_CLK_SMC1, CPM_BRG4, 3},
+ {CPM_CLK_SMC1, CPM_CLK1, 4},
+ {CPM_CLK_SMC1, CPM_CLK2, 5},
+ {CPM_CLK_SMC1, CPM_CLK3, 6},
+ {CPM_CLK_SMC1, CPM_CLK4, 7},
+
+ {CPM_CLK_SMC2, CPM_BRG1, 0},
+ {CPM_CLK_SMC2, CPM_BRG2, 1},
+ {CPM_CLK_SMC2, CPM_BRG3, 2},
+ {CPM_CLK_SMC2, CPM_BRG4, 3},
+ {CPM_CLK_SMC2, CPM_CLK5, 4},
+ {CPM_CLK_SMC2, CPM_CLK6, 5},
+ {CPM_CLK_SMC2, CPM_CLK7, 6},
+ {CPM_CLK_SMC2, CPM_CLK8, 7},
+ };
+
+ switch (target) {
+ case CPM_CLK_SCC1:
+ reg = &mpc8xx_immr->im_cpm.cp_sicr;
+ shift = 0;
+ break;
+
+ case CPM_CLK_SCC2:
+ reg = &mpc8xx_immr->im_cpm.cp_sicr;
+ shift = 8;
+ break;
+
+ case CPM_CLK_SCC3:
+ reg = &mpc8xx_immr->im_cpm.cp_sicr;
+ shift = 16;
+ break;
+
+ case CPM_CLK_SCC4:
+ reg = &mpc8xx_immr->im_cpm.cp_sicr;
+ shift = 24;
+ break;
+
+ case CPM_CLK_SMC1:
+ reg = &mpc8xx_immr->im_cpm.cp_simode;
+ shift = 12;
+ break;
+
+ case CPM_CLK_SMC2:
+ reg = &mpc8xx_immr->im_cpm.cp_simode;
+ shift = 28;
+ break;
+
+ default:
+ printk(KERN_ERR "cpm1_clock_setup: invalid clock target\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(clk_map); i++) {
+ if (clk_map[i][0] == target && clk_map[i][1] == clock) {
+ bits = clk_map[i][2];
+ break;
+ }
+ }
+
+ if (i == ARRAY_SIZE(clk_map)) {
+ printk(KERN_ERR "cpm1_clock_setup: invalid clock combination\n");
+ return -EINVAL;
+ }
+
+ bits <<= shift;
+ mask <<= shift;
+
+ if (reg == &mpc8xx_immr->im_cpm.cp_sicr) {
+ if (mode == CPM_CLK_RTX) {
+ bits |= bits << 3;
+ mask |= mask << 3;
+ } else if (mode == CPM_CLK_RX) {
+ bits <<= 3;
+ mask <<= 3;
+ }
+ }
+
+ out_be32(reg, (in_be32(reg) & ~mask) | bits);
+
+ return 0;
+}
+
+/*
+ * GPIO LIB API implementation
+ */
+#ifdef CONFIG_8xx_GPIO
+
+struct cpm1_gpio16_chip {
+ struct of_mm_gpio_chip mm_gc;
+ spinlock_t lock;
+
+ /* shadowed data register to clear/set bits safely */
+ u16 cpdata;
+
+ /* IRQ associated with Pins when relevant */
+ int irq[16];
+};
+
+static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
+{
+ struct cpm1_gpio16_chip *cpm1_gc =
+ container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc);
+ struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+
+ cpm1_gc->cpdata = in_be16(&iop->dat);
+}
+
+static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+ u16 pin_mask;
+
+ pin_mask = 1 << (15 - gpio);
+
+ return !!(in_be16(&iop->dat) & pin_mask);
+}
+
+static void __cpm1_gpio16_set(struct of_mm_gpio_chip *mm_gc, u16 pin_mask,
+ int value)
+{
+ struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+ struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+
+ if (value)
+ cpm1_gc->cpdata |= pin_mask;
+ else
+ cpm1_gc->cpdata &= ~pin_mask;
+
+ out_be16(&iop->dat, cpm1_gc->cpdata);
+}
+
+static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+ unsigned long flags;
+ u16 pin_mask = 1 << (15 - gpio);
+
+ spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+ __cpm1_gpio16_set(mm_gc, pin_mask, value);
+
+ spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+}
+
+static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+
+ return cpm1_gc->irq[gpio] ? : -ENXIO;
+}
+
+static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+ struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+ unsigned long flags;
+ u16 pin_mask = 1 << (15 - gpio);
+
+ spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+ setbits16(&iop->dir, pin_mask);
+ __cpm1_gpio16_set(mm_gc, pin_mask, val);
+
+ spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+ return 0;
+}
+
+static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+ struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+ unsigned long flags;
+ u16 pin_mask = 1 << (15 - gpio);
+
+ spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+ clrbits16(&iop->dir, pin_mask);
+
+ spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+ return 0;
+}
+
+int cpm1_gpiochip_add16(struct device *dev)
+{
+ struct device_node *np = dev->of_node;
+ struct cpm1_gpio16_chip *cpm1_gc;
+ struct of_mm_gpio_chip *mm_gc;
+ struct gpio_chip *gc;
+ u16 mask;
+
+ cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
+ if (!cpm1_gc)
+ return -ENOMEM;
+
+ spin_lock_init(&cpm1_gc->lock);
+
+ if (!of_property_read_u16(np, "fsl,cpm1-gpio-irq-mask", &mask)) {
+ int i, j;
+
+ for (i = 0, j = 0; i < 16; i++)
+ if (mask & (1 << (15 - i)))
+ cpm1_gc->irq[i] = irq_of_parse_and_map(np, j++);
+ }
+
+ mm_gc = &cpm1_gc->mm_gc;
+ gc = &mm_gc->gc;
+
+ mm_gc->save_regs = cpm1_gpio16_save_regs;
+ gc->ngpio = 16;
+ gc->direction_input = cpm1_gpio16_dir_in;
+ gc->direction_output = cpm1_gpio16_dir_out;
+ gc->get = cpm1_gpio16_get;
+ gc->set = cpm1_gpio16_set;
+ gc->to_irq = cpm1_gpio16_to_irq;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
+
+ return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
+}
+
+struct cpm1_gpio32_chip {
+ struct of_mm_gpio_chip mm_gc;
+ spinlock_t lock;
+
+ /* shadowed data register to clear/set bits safely */
+ u32 cpdata;
+};
+
+static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
+{
+ struct cpm1_gpio32_chip *cpm1_gc =
+ container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc);
+ struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+
+ cpm1_gc->cpdata = in_be32(&iop->dat);
+}
+
+static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+ u32 pin_mask;
+
+ pin_mask = 1 << (31 - gpio);
+
+ return !!(in_be32(&iop->dat) & pin_mask);
+}
+
+static void __cpm1_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask,
+ int value)
+{
+ struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+ struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+
+ if (value)
+ cpm1_gc->cpdata |= pin_mask;
+ else
+ cpm1_gc->cpdata &= ~pin_mask;
+
+ out_be32(&iop->dat, cpm1_gc->cpdata);
+}
+
+static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+ unsigned long flags;
+ u32 pin_mask = 1 << (31 - gpio);
+
+ spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+ __cpm1_gpio32_set(mm_gc, pin_mask, value);
+
+ spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+}
+
+static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+ struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+ unsigned long flags;
+ u32 pin_mask = 1 << (31 - gpio);
+
+ spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+ setbits32(&iop->dir, pin_mask);
+ __cpm1_gpio32_set(mm_gc, pin_mask, val);
+
+ spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+ return 0;
+}
+
+static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
+{
+ struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
+ struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+ struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+ unsigned long flags;
+ u32 pin_mask = 1 << (31 - gpio);
+
+ spin_lock_irqsave(&cpm1_gc->lock, flags);
+
+ clrbits32(&iop->dir, pin_mask);
+
+ spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+ return 0;
+}
+
+int cpm1_gpiochip_add32(struct device *dev)
+{
+ struct device_node *np = dev->of_node;
+ struct cpm1_gpio32_chip *cpm1_gc;
+ struct of_mm_gpio_chip *mm_gc;
+ struct gpio_chip *gc;
+
+ cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
+ if (!cpm1_gc)
+ return -ENOMEM;
+
+ spin_lock_init(&cpm1_gc->lock);
+
+ mm_gc = &cpm1_gc->mm_gc;
+ gc = &mm_gc->gc;
+
+ mm_gc->save_regs = cpm1_gpio32_save_regs;
+ gc->ngpio = 32;
+ gc->direction_input = cpm1_gpio32_dir_in;
+ gc->direction_output = cpm1_gpio32_dir_out;
+ gc->get = cpm1_gpio32_get;
+ gc->set = cpm1_gpio32_set;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
+
+ return of_mm_gpiochip_add_data(np, mm_gc, cpm1_gc);
+}
+
+#endif /* CONFIG_8xx_GPIO */
diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c
new file mode 100644
index 000000000..fc276a29d
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/ep88xc.c
@@ -0,0 +1,170 @@
+/*
+ * Platform setup for the Embedded Planet EP88xC board
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/udbg.h>
+#include <asm/cpm1.h>
+
+#include "mpc8xx.h"
+#include "pic.h"
+
+struct cpm_pin {
+ int port, pin, flags;
+};
+
+static struct cpm_pin ep88xc_pins[] = {
+ /* SMC1 */
+ {1, 24, CPM_PIN_INPUT}, /* RX */
+ {1, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+ /* SCC2 */
+ {0, 12, CPM_PIN_INPUT}, /* TX */
+ {0, 13, CPM_PIN_INPUT}, /* RX */
+ {2, 8, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CD */
+ {2, 9, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CTS */
+ {2, 14, CPM_PIN_INPUT}, /* RTS */
+
+ /* MII1 */
+ {0, 0, CPM_PIN_INPUT},
+ {0, 1, CPM_PIN_INPUT},
+ {0, 2, CPM_PIN_INPUT},
+ {0, 3, CPM_PIN_INPUT},
+ {0, 4, CPM_PIN_OUTPUT},
+ {0, 10, CPM_PIN_OUTPUT},
+ {0, 11, CPM_PIN_OUTPUT},
+ {1, 19, CPM_PIN_INPUT},
+ {1, 31, CPM_PIN_INPUT},
+ {2, 12, CPM_PIN_INPUT},
+ {2, 13, CPM_PIN_INPUT},
+ {3, 8, CPM_PIN_INPUT},
+ {4, 30, CPM_PIN_OUTPUT},
+ {4, 31, CPM_PIN_OUTPUT},
+
+ /* MII2 */
+ {4, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {4, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {4, 16, CPM_PIN_OUTPUT},
+ {4, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {4, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {4, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {4, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {4, 21, CPM_PIN_OUTPUT},
+ {4, 22, CPM_PIN_OUTPUT},
+ {4, 23, CPM_PIN_OUTPUT},
+ {4, 24, CPM_PIN_OUTPUT},
+ {4, 25, CPM_PIN_OUTPUT},
+ {4, 26, CPM_PIN_OUTPUT},
+ {4, 27, CPM_PIN_OUTPUT},
+ {4, 28, CPM_PIN_OUTPUT},
+ {4, 29, CPM_PIN_OUTPUT},
+
+ /* USB */
+ {0, 6, CPM_PIN_INPUT}, /* CLK2 */
+ {0, 14, CPM_PIN_INPUT}, /* USBOE */
+ {0, 15, CPM_PIN_INPUT}, /* USBRXD */
+ {2, 6, CPM_PIN_OUTPUT}, /* USBTXN */
+ {2, 7, CPM_PIN_OUTPUT}, /* USBTXP */
+ {2, 10, CPM_PIN_INPUT}, /* USBRXN */
+ {2, 11, CPM_PIN_INPUT}, /* USBRXP */
+
+ /* Misc */
+ {1, 26, CPM_PIN_INPUT}, /* BRGO2 */
+ {1, 27, CPM_PIN_INPUT}, /* BRGO1 */
+};
+
+static void __init init_ioports(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ep88xc_pins); i++) {
+ struct cpm_pin *pin = &ep88xc_pins[i];
+ cpm1_set_pin(pin->port, pin->pin, pin->flags);
+ }
+
+ cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+ cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_TX); /* USB */
+ cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_RX);
+ cpm1_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX);
+ cpm1_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX);
+}
+
+static u8 __iomem *ep88xc_bcsr;
+
+#define BCSR7_SCC2_ENABLE 0x10
+
+#define BCSR8_PHY1_ENABLE 0x80
+#define BCSR8_PHY1_POWER 0x40
+#define BCSR8_PHY2_ENABLE 0x20
+#define BCSR8_PHY2_POWER 0x10
+
+#define BCSR9_USB_ENABLE 0x80
+#define BCSR9_USB_POWER 0x40
+#define BCSR9_USB_HOST 0x20
+#define BCSR9_USB_FULL_SPEED_TARGET 0x10
+
+static void __init ep88xc_setup_arch(void)
+{
+ struct device_node *np;
+
+ cpm_reset();
+ init_ioports();
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,ep88xc-bcsr");
+ if (!np) {
+ printk(KERN_CRIT "Could not find fsl,ep88xc-bcsr node\n");
+ return;
+ }
+
+ ep88xc_bcsr = of_iomap(np, 0);
+ of_node_put(np);
+
+ if (!ep88xc_bcsr) {
+ printk(KERN_CRIT "Could not remap BCSR\n");
+ return;
+ }
+
+ setbits8(&ep88xc_bcsr[7], BCSR7_SCC2_ENABLE);
+ setbits8(&ep88xc_bcsr[8], BCSR8_PHY1_ENABLE | BCSR8_PHY1_POWER |
+ BCSR8_PHY2_ENABLE | BCSR8_PHY2_POWER);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .name = "soc", },
+ { .name = "cpm", },
+ { .name = "localbus", },
+ {},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+ /* Publish the QE devices */
+ of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+ return 0;
+}
+machine_device_initcall(ep88xc, declare_of_platform_devices);
+
+define_machine(ep88xc) {
+ .name = "Embedded Planet EP88xC",
+ .compatible = "fsl,ep88xc",
+ .setup_arch = ep88xc_setup_arch,
+ .init_IRQ = mpc8xx_pic_init,
+ .get_irq = mpc8xx_get_irq,
+ .restart = mpc8xx_restart,
+ .calibrate_decr = mpc8xx_calibrate_decr,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
new file mode 100644
index 000000000..2336b687b
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 1995 Linus Torvalds
+ * Adapted from 'alpha' version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * Modified for MBX using prep/chrp/pmac functions by Dan (dmalek@jlc.net)
+ * Further modified for generic 8xx by Dan.
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/rtc.h>
+#include <linux/fsl_devices.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+
+#include <asm/io.h>
+#include <asm/8xx_immap.h>
+#include <mm/mmu_decl.h>
+
+#include "pic.h"
+
+#include "mpc8xx.h"
+
+/* A place holder for time base interrupts, if they are ever enabled. */
+static irqreturn_t timebase_interrupt(int irq, void *dev)
+{
+ printk ("timebase_interrupt()\n");
+
+ return IRQ_HANDLED;
+}
+
+static int __init get_freq(char *name, unsigned long *val)
+{
+ struct device_node *cpu;
+ const unsigned int *fp;
+ int found = 0;
+
+ /* The cpu node should have timebase and clock frequency properties */
+ cpu = of_get_cpu_node(0, NULL);
+
+ if (cpu) {
+ fp = of_get_property(cpu, name, NULL);
+ if (fp) {
+ found = 1;
+ *val = *fp;
+ }
+
+ of_node_put(cpu);
+ }
+
+ return found;
+}
+
+/* The decrementer counts at the system (internal) clock frequency divided by
+ * sixteen, or external oscillator divided by four. We force the processor
+ * to use system clock divided by sixteen.
+ */
+void __init mpc8xx_calibrate_decr(void)
+{
+ struct device_node *cpu;
+ int irq, virq;
+
+ /* Unlock the SCCR. */
+ out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, KAPWR_KEY);
+
+ /* Force all 8xx processors to use divide by 16 processor clock. */
+ setbits32(&mpc8xx_immr->im_clkrst.car_sccr, 0x02000000);
+
+ /* Processor frequency is MHz.
+ */
+ ppc_proc_freq = 50000000;
+ if (!get_freq("clock-frequency", &ppc_proc_freq))
+ printk(KERN_ERR "WARNING: Estimating processor frequency "
+ "(not found)\n");
+
+ ppc_tb_freq = ppc_proc_freq / 16;
+ printk("Decrementer Frequency = 0x%lx\n", ppc_tb_freq);
+
+ /* Perform some more timer/timebase initialization. This used
+ * to be done elsewhere, but other changes caused it to get
+ * called more than once....that is a bad thing.
+ *
+ * First, unlock all of the registers we are going to modify.
+ * To protect them from corruption during power down, registers
+ * that are maintained by keep alive power are "locked". To
+ * modify these registers we have to write the key value to
+ * the key location associated with the register.
+ * Some boards power up with these unlocked, while others
+ * are locked. Writing anything (including the unlock code?)
+ * to the unlocked registers will lock them again. So, here
+ * we guarantee the registers are locked, then we unlock them
+ * for our use.
+ */
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, KAPWR_KEY);
+
+ /* Disable the RTC one second and alarm interrupts. */
+ clrbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_SIE | RTCSC_ALE));
+
+ /* Enable the RTC */
+ setbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_RTF | RTCSC_RTE));
+
+ /* Enabling the decrementer also enables the timebase interrupts
+ * (or from the other point of view, to get decrementer interrupts
+ * we have to enable the timebase). The decrementer interrupt
+ * is wired into the vector table, nothing to do here for that.
+ */
+ cpu = of_get_cpu_node(0, NULL);
+ virq= irq_of_parse_and_map(cpu, 0);
+ of_node_put(cpu);
+ irq = virq_to_hw(virq);
+
+ out_be16(&mpc8xx_immr->im_sit.sit_tbscr,
+ ((1 << (7 - (irq / 2))) << 8) | (TBSCR_TBF | TBSCR_TBE));
+
+ if (request_irq(virq, timebase_interrupt, IRQF_NO_THREAD, "tbint",
+ NULL))
+ panic("Could not allocate timer IRQ!");
+}
+
+/* The RTC on the MPC8xx is an internal register.
+ * We want to protect this during power down, so we need to unlock,
+ * modify, and re-lock.
+ */
+
+int mpc8xx_set_rtc_time(struct rtc_time *tm)
+{
+ time64_t time;
+
+ time = rtc_tm_to_time64(tm);
+
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sit.sit_rtc, (u32)time);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, ~KAPWR_KEY);
+
+ return 0;
+}
+
+void mpc8xx_get_rtc_time(struct rtc_time *tm)
+{
+ unsigned long data;
+
+ /* Get time from the RTC. */
+ data = in_be32(&mpc8xx_immr->im_sit.sit_rtc);
+ rtc_time64_to_tm(data, tm);
+ return;
+}
+
+void __noreturn mpc8xx_restart(char *cmd)
+{
+ local_irq_disable();
+
+ setbits32(&mpc8xx_immr->im_clkrst.car_plprcr, 0x00000080);
+ /* Clear the ME bit in MSR to cause checkstop on machine check
+ */
+ mtmsr(mfmsr() & ~0x1000);
+
+ in_8(&mpc8xx_immr->im_clkrst.res[0]);
+ panic("Restart failed\n");
+}
diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c
new file mode 100644
index 000000000..656365975
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/machine_check.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_8xx(struct pt_regs *regs)
+{
+ unsigned long reason = regs->msr;
+
+ pr_err("Machine check in kernel mode.\n");
+ pr_err("Caused by (from SRR1=%lx): ", reason);
+ if (reason & 0x40000000)
+ pr_cont("Fetch error at address %lx\n", regs->nip);
+ else
+ pr_cont("Data access error at address %lx\n", regs->dar);
+
+#ifdef CONFIG_PCI
+ /* the qspan pci read routines can cause machine checks -- Cort
+ *
+ * yuck !!! that totally needs to go away ! There are better ways
+ * to deal with that than having a wart in the mcheck handler.
+ * -- BenH
+ */
+ bad_page_fault(regs, SIGBUS);
+ return 1;
+#else
+ return 0;
+#endif
+}
diff --git a/arch/powerpc/platforms/8xx/mpc86xads.h b/arch/powerpc/platforms/8xx/mpc86xads.h
new file mode 100644
index 000000000..17b1fe75e
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc86xads.h
@@ -0,0 +1,47 @@
+/*
+ * A collection of structures, addresses, and values associated with
+ * the Freescale MPC86xADS board.
+ * Copied from the FADS stuff.
+ *
+ * Author: MontaVista Software, Inc.
+ * source@mvista.com
+ *
+ * 2005 (c) MontaVista Software, Inc. This file is licensed under the
+ * terms of the GNU General Public License version 2. This program is licensed
+ * "as is" without any warranty of any kind, whether express or implied.
+ */
+
+#ifdef __KERNEL__
+#ifndef __ASM_MPC86XADS_H__
+#define __ASM_MPC86XADS_H__
+
+/* Bits of interest in the BCSRs.
+ */
+#define BCSR1_ETHEN ((uint)0x20000000)
+#define BCSR1_IRDAEN ((uint)0x10000000)
+#define BCSR1_RS232EN_1 ((uint)0x01000000)
+#define BCSR1_PCCEN ((uint)0x00800000)
+#define BCSR1_PCCVCC0 ((uint)0x00400000)
+#define BCSR1_PCCVPP0 ((uint)0x00200000)
+#define BCSR1_PCCVPP1 ((uint)0x00100000)
+#define BCSR1_PCCVPP_MASK (BCSR1_PCCVPP0 | BCSR1_PCCVPP1)
+#define BCSR1_RS232EN_2 ((uint)0x00040000)
+#define BCSR1_PCCVCC1 ((uint)0x00010000)
+#define BCSR1_PCCVCC_MASK (BCSR1_PCCVCC0 | BCSR1_PCCVCC1)
+
+#define BCSR4_ETH10_RST ((uint)0x80000000) /* 10Base-T PHY reset*/
+#define BCSR4_USB_LO_SPD ((uint)0x04000000)
+#define BCSR4_USB_VCC ((uint)0x02000000)
+#define BCSR4_USB_FULL_SPD ((uint)0x00040000)
+#define BCSR4_USB_EN ((uint)0x00020000)
+
+#define BCSR5_MII2_EN 0x40
+#define BCSR5_MII2_RST 0x20
+#define BCSR5_T1_RST 0x10
+#define BCSR5_ATM155_RST 0x08
+#define BCSR5_ATM25_RST 0x04
+#define BCSR5_MII1_EN 0x02
+#define BCSR5_MII1_RST 0x01
+
+#endif /* __ASM_MPC86XADS_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
new file mode 100644
index 000000000..e4192c0a3
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
@@ -0,0 +1,145 @@
+/*arch/powerpc/platforms/8xx/mpc86xads_setup.c
+ *
+ * Platform setup for the Freescale mpc86xads board
+ *
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Copyright 2005 MontaVista Software Inc.
+ *
+ * Heavily modified by Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/udbg.h>
+
+#include "mpc86xads.h"
+#include "mpc8xx.h"
+#include "pic.h"
+
+struct cpm_pin {
+ int port, pin, flags;
+};
+
+static struct cpm_pin mpc866ads_pins[] = {
+ /* SMC1 */
+ {CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+ {CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+ /* SMC2 */
+ {CPM_PORTB, 21, CPM_PIN_INPUT}, /* RX */
+ {CPM_PORTB, 20, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+ /* SCC1 */
+ {CPM_PORTA, 6, CPM_PIN_INPUT}, /* CLK1 */
+ {CPM_PORTA, 7, CPM_PIN_INPUT}, /* CLK2 */
+ {CPM_PORTA, 14, CPM_PIN_INPUT}, /* TX */
+ {CPM_PORTA, 15, CPM_PIN_INPUT}, /* RX */
+ {CPM_PORTB, 19, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */
+ {CPM_PORTC, 10, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* RENA */
+ {CPM_PORTC, 11, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CLSN */
+
+ /* MII */
+ {CPM_PORTD, 3, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 4, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 5, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 6, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 7, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 8, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 9, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 10, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 11, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 12, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 13, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 14, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 15, CPM_PIN_OUTPUT},
+
+ /* I2C */
+ {CPM_PORTB, 26, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+ {CPM_PORTB, 27, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+};
+
+static void __init init_ioports(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mpc866ads_pins); i++) {
+ struct cpm_pin *pin = &mpc866ads_pins[i];
+ cpm1_set_pin(pin->port, pin->pin, pin->flags);
+ }
+
+ cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+ cpm1_clk_setup(CPM_CLK_SMC2, CPM_BRG2, CPM_CLK_RTX);
+ cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK1, CPM_CLK_TX);
+ cpm1_clk_setup(CPM_CLK_SCC1, CPM_CLK2, CPM_CLK_RX);
+
+ /* Set FEC1 and FEC2 to MII mode */
+ clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
+}
+
+static void __init mpc86xads_setup_arch(void)
+{
+ struct device_node *np;
+ u32 __iomem *bcsr_io;
+
+ cpm_reset();
+ init_ioports();
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,mpc866ads-bcsr");
+ if (!np) {
+ printk(KERN_CRIT "Could not find fsl,mpc866ads-bcsr node\n");
+ return;
+ }
+
+ bcsr_io = of_iomap(np, 0);
+ of_node_put(np);
+
+ if (bcsr_io == NULL) {
+ printk(KERN_CRIT "Could not remap BCSR\n");
+ return;
+ }
+
+ clrbits32(bcsr_io, BCSR1_RS232EN_1 | BCSR1_RS232EN_2 | BCSR1_ETHEN);
+ iounmap(bcsr_io);
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .name = "soc", },
+ { .name = "cpm", },
+ { .name = "localbus", },
+ {},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+ of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+ return 0;
+}
+machine_device_initcall(mpc86x_ads, declare_of_platform_devices);
+
+define_machine(mpc86x_ads) {
+ .name = "MPC86x ADS",
+ .compatible = "fsl,mpc866ads",
+ .setup_arch = mpc86xads_setup_arch,
+ .init_IRQ = mpc8xx_pic_init,
+ .get_irq = mpc8xx_get_irq,
+ .restart = mpc8xx_restart,
+ .calibrate_decr = mpc8xx_calibrate_decr,
+ .set_rtc_time = mpc8xx_set_rtc_time,
+ .get_rtc_time = mpc8xx_get_rtc_time,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/mpc885ads.h b/arch/powerpc/platforms/8xx/mpc885ads.h
new file mode 100644
index 000000000..19412f76f
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc885ads.h
@@ -0,0 +1,49 @@
+/*
+ * A collection of structures, addresses, and values associated with
+ * the Freescale MPC885ADS board.
+ * Copied from the FADS stuff.
+ *
+ * Author: MontaVista Software, Inc.
+ * source@mvista.com
+ *
+ * 2005 (c) MontaVista Software, Inc. This file is licensed under the
+ * terms of the GNU General Public License version 2. This program is licensed
+ * "as is" without any warranty of any kind, whether express or implied.
+ */
+
+#ifdef __KERNEL__
+#ifndef __ASM_MPC885ADS_H__
+#define __ASM_MPC885ADS_H__
+
+#include <sysdev/fsl_soc.h>
+
+/* Bits of interest in the BCSRs.
+ */
+#define BCSR1_ETHEN ((uint)0x20000000)
+#define BCSR1_IRDAEN ((uint)0x10000000)
+#define BCSR1_RS232EN_1 ((uint)0x01000000)
+#define BCSR1_PCCEN ((uint)0x00800000)
+#define BCSR1_PCCVCC0 ((uint)0x00400000)
+#define BCSR1_PCCVPP0 ((uint)0x00200000)
+#define BCSR1_PCCVPP1 ((uint)0x00100000)
+#define BCSR1_PCCVPP_MASK (BCSR1_PCCVPP0 | BCSR1_PCCVPP1)
+#define BCSR1_RS232EN_2 ((uint)0x00040000)
+#define BCSR1_PCCVCC1 ((uint)0x00010000)
+#define BCSR1_PCCVCC_MASK (BCSR1_PCCVCC0 | BCSR1_PCCVCC1)
+
+#define BCSR4_ETH10_RST ((uint)0x80000000) /* 10Base-T PHY reset*/
+#define BCSR4_USB_LO_SPD ((uint)0x04000000)
+#define BCSR4_USB_VCC ((uint)0x02000000)
+#define BCSR4_USB_FULL_SPD ((uint)0x00040000)
+#define BCSR4_USB_EN ((uint)0x00020000)
+
+#define BCSR5_MII2_EN 0x40
+#define BCSR5_MII2_RST 0x20
+#define BCSR5_T1_RST 0x10
+#define BCSR5_ATM155_RST 0x08
+#define BCSR5_ATM25_RST 0x04
+#define BCSR5_MII1_EN 0x02
+#define BCSR5_MII1_RST 0x01
+
+#endif /* __ASM_MPC885ADS_H__ */
+#endif /* __KERNEL__ */
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
new file mode 100644
index 000000000..2d899be74
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -0,0 +1,217 @@
+/*
+ * Platform setup for the Freescale mpc885ads board
+ *
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Copyright 2005 MontaVista Software Inc.
+ *
+ * Heavily modified by Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+
+#include <linux/fsl_devices.h>
+#include <linux/mii.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/udbg.h>
+
+#include "mpc885ads.h"
+#include "mpc8xx.h"
+#include "pic.h"
+
+static u32 __iomem *bcsr, *bcsr5;
+
+struct cpm_pin {
+ int port, pin, flags;
+};
+
+static struct cpm_pin mpc885ads_pins[] = {
+ /* SMC1 */
+ {CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+ {CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+ /* SMC2 */
+#ifndef CONFIG_MPC8xx_SECOND_ETH_FEC2
+ {CPM_PORTE, 21, CPM_PIN_INPUT}, /* RX */
+ {CPM_PORTE, 20, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+#endif
+
+ /* SCC3 */
+ {CPM_PORTA, 9, CPM_PIN_INPUT}, /* RX */
+ {CPM_PORTA, 8, CPM_PIN_INPUT}, /* TX */
+ {CPM_PORTC, 4, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* RENA */
+ {CPM_PORTC, 5, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO}, /* CLSN */
+ {CPM_PORTE, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */
+ {CPM_PORTE, 17, CPM_PIN_INPUT}, /* CLK5 */
+ {CPM_PORTE, 16, CPM_PIN_INPUT}, /* CLK6 */
+
+ /* MII1 */
+ {CPM_PORTA, 0, CPM_PIN_INPUT},
+ {CPM_PORTA, 1, CPM_PIN_INPUT},
+ {CPM_PORTA, 2, CPM_PIN_INPUT},
+ {CPM_PORTA, 3, CPM_PIN_INPUT},
+ {CPM_PORTA, 4, CPM_PIN_OUTPUT},
+ {CPM_PORTA, 10, CPM_PIN_OUTPUT},
+ {CPM_PORTA, 11, CPM_PIN_OUTPUT},
+ {CPM_PORTB, 19, CPM_PIN_INPUT},
+ {CPM_PORTB, 31, CPM_PIN_INPUT},
+ {CPM_PORTC, 12, CPM_PIN_INPUT},
+ {CPM_PORTC, 13, CPM_PIN_INPUT},
+ {CPM_PORTE, 30, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 31, CPM_PIN_OUTPUT},
+
+ /* MII2 */
+#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
+ {CPM_PORTE, 14, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 15, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 16, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 17, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 18, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 19, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 20, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
+ {CPM_PORTE, 21, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 22, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 23, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 24, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 25, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 26, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 27, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 28, CPM_PIN_OUTPUT},
+ {CPM_PORTE, 29, CPM_PIN_OUTPUT},
+#endif
+ /* I2C */
+ {CPM_PORTB, 26, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+ {CPM_PORTB, 27, CPM_PIN_INPUT | CPM_PIN_OPENDRAIN},
+};
+
+static void __init init_ioports(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mpc885ads_pins); i++) {
+ struct cpm_pin *pin = &mpc885ads_pins[i];
+ cpm1_set_pin(pin->port, pin->pin, pin->flags);
+ }
+
+ cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+ cpm1_clk_setup(CPM_CLK_SMC2, CPM_BRG2, CPM_CLK_RTX);
+ cpm1_clk_setup(CPM_CLK_SCC3, CPM_CLK5, CPM_CLK_TX);
+ cpm1_clk_setup(CPM_CLK_SCC3, CPM_CLK6, CPM_CLK_RX);
+
+ /* Set FEC1 and FEC2 to MII mode */
+ clrbits32(&mpc8xx_immr->im_cpm.cp_cptr, 0x00000180);
+}
+
+static void __init mpc885ads_setup_arch(void)
+{
+ struct device_node *np;
+
+ cpm_reset();
+ init_ioports();
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,mpc885ads-bcsr");
+ if (!np) {
+ printk(KERN_CRIT "Could not find fsl,mpc885ads-bcsr node\n");
+ return;
+ }
+
+ bcsr = of_iomap(np, 0);
+ bcsr5 = of_iomap(np, 1);
+ of_node_put(np);
+
+ if (!bcsr || !bcsr5) {
+ printk(KERN_CRIT "Could not remap BCSR\n");
+ return;
+ }
+
+ clrbits32(&bcsr[1], BCSR1_RS232EN_1);
+#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
+ setbits32(&bcsr[1], BCSR1_RS232EN_2);
+#else
+ clrbits32(&bcsr[1], BCSR1_RS232EN_2);
+#endif
+
+ clrbits32(bcsr5, BCSR5_MII1_EN);
+ setbits32(bcsr5, BCSR5_MII1_RST);
+ udelay(1000);
+ clrbits32(bcsr5, BCSR5_MII1_RST);
+
+#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
+ clrbits32(bcsr5, BCSR5_MII2_EN);
+ setbits32(bcsr5, BCSR5_MII2_RST);
+ udelay(1000);
+ clrbits32(bcsr5, BCSR5_MII2_RST);
+#else
+ setbits32(bcsr5, BCSR5_MII2_EN);
+#endif
+
+#ifdef CONFIG_MPC8xx_SECOND_ETH_SCC3
+ clrbits32(&bcsr[4], BCSR4_ETH10_RST);
+ udelay(1000);
+ setbits32(&bcsr[4], BCSR4_ETH10_RST);
+
+ setbits32(&bcsr[1], BCSR1_ETHEN);
+
+ np = of_find_node_by_path("/soc@ff000000/cpm@9c0/serial@a80");
+#else
+ np = of_find_node_by_path("/soc@ff000000/cpm@9c0/ethernet@a40");
+#endif
+
+ /* The SCC3 enet registers overlap the SMC1 registers, so
+ * one of the two must be removed from the device tree.
+ */
+
+ if (np) {
+ of_detach_node(np);
+ of_node_put(np);
+ }
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .name = "soc", },
+ { .name = "cpm", },
+ { .name = "localbus", },
+ {},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+ /* Publish the QE devices */
+ of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+ return 0;
+}
+machine_device_initcall(mpc885_ads, declare_of_platform_devices);
+
+define_machine(mpc885_ads) {
+ .name = "Freescale MPC885 ADS",
+ .compatible = "fsl,mpc885ads",
+ .setup_arch = mpc885ads_setup_arch,
+ .init_IRQ = mpc8xx_pic_init,
+ .get_irq = mpc8xx_get_irq,
+ .restart = mpc8xx_restart,
+ .calibrate_decr = mpc8xx_calibrate_decr,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/8xx/mpc8xx.h b/arch/powerpc/platforms/8xx/mpc8xx.h
new file mode 100644
index 000000000..79fae3324
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/mpc8xx.h
@@ -0,0 +1,20 @@
+/*
+ * Prototypes, etc. for the Freescale MPC8xx embedded cpu chips
+ * May need to be cleaned as the port goes on ...
+ *
+ * Copyright (C) 2008 Jochen Friedrich <jochen@scram.de>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+#ifndef __MPC8xx_H
+#define __MPC8xx_H
+
+extern void __noreturn mpc8xx_restart(char *cmd);
+extern void mpc8xx_calibrate_decr(void);
+extern int mpc8xx_set_rtc_time(struct rtc_time *tm);
+extern void mpc8xx_get_rtc_time(struct rtc_time *tm);
+extern unsigned int mpc8xx_get_irq(void);
+
+#endif /* __MPC8xx_H */
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
new file mode 100644
index 000000000..ea6b0e523
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -0,0 +1,155 @@
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/irq.h>
+#include <linux/dma-mapping.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/8xx_immap.h>
+
+#include "pic.h"
+
+
+#define PIC_VEC_SPURRIOUS 15
+
+static struct irq_domain *mpc8xx_pic_host;
+static unsigned long mpc8xx_cached_irq_mask;
+static sysconf8xx_t __iomem *siu_reg;
+
+static inline unsigned long mpc8xx_irqd_to_bit(struct irq_data *d)
+{
+ return 0x80000000 >> irqd_to_hwirq(d);
+}
+
+static void mpc8xx_unmask_irq(struct irq_data *d)
+{
+ mpc8xx_cached_irq_mask |= mpc8xx_irqd_to_bit(d);
+ out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask);
+}
+
+static void mpc8xx_mask_irq(struct irq_data *d)
+{
+ mpc8xx_cached_irq_mask &= ~mpc8xx_irqd_to_bit(d);
+ out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask);
+}
+
+static void mpc8xx_ack(struct irq_data *d)
+{
+ out_be32(&siu_reg->sc_sipend, mpc8xx_irqd_to_bit(d));
+}
+
+static void mpc8xx_end_irq(struct irq_data *d)
+{
+ mpc8xx_cached_irq_mask |= mpc8xx_irqd_to_bit(d);
+ out_be32(&siu_reg->sc_simask, mpc8xx_cached_irq_mask);
+}
+
+static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+ /* only external IRQ senses are programmable */
+ if ((flow_type & IRQ_TYPE_EDGE_FALLING) && !(irqd_to_hwirq(d) & 1)) {
+ unsigned int siel = in_be32(&siu_reg->sc_siel);
+ siel |= mpc8xx_irqd_to_bit(d);
+ out_be32(&siu_reg->sc_siel, siel);
+ irq_set_handler_locked(d, handle_edge_irq);
+ }
+ return 0;
+}
+
+static struct irq_chip mpc8xx_pic = {
+ .name = "8XX SIU",
+ .irq_unmask = mpc8xx_unmask_irq,
+ .irq_mask = mpc8xx_mask_irq,
+ .irq_ack = mpc8xx_ack,
+ .irq_eoi = mpc8xx_end_irq,
+ .irq_set_type = mpc8xx_set_irq_type,
+};
+
+unsigned int mpc8xx_get_irq(void)
+{
+ int irq;
+
+ /* For MPC8xx, read the SIVEC register and shift the bits down
+ * to get the irq number.
+ */
+ irq = in_be32(&siu_reg->sc_sivec) >> 26;
+
+ if (irq == PIC_VEC_SPURRIOUS)
+ return 0;
+
+ return irq_linear_revmap(mpc8xx_pic_host, irq);
+
+}
+
+static int mpc8xx_pic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ pr_debug("mpc8xx_pic_host_map(%d, 0x%lx)\n", virq, hw);
+
+ /* Set default irq handle */
+ irq_set_chip_and_handler(virq, &mpc8xx_pic, handle_level_irq);
+ return 0;
+}
+
+
+static int mpc8xx_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+ static unsigned char map_pic_senses[4] = {
+ IRQ_TYPE_EDGE_RISING,
+ IRQ_TYPE_LEVEL_LOW,
+ IRQ_TYPE_LEVEL_HIGH,
+ IRQ_TYPE_EDGE_FALLING,
+ };
+
+ if (intspec[0] > 0x1f)
+ return 0;
+
+ *out_hwirq = intspec[0];
+ if (intsize > 1 && intspec[1] < 4)
+ *out_flags = map_pic_senses[intspec[1]];
+ else
+ *out_flags = IRQ_TYPE_NONE;
+
+ return 0;
+}
+
+
+static const struct irq_domain_ops mpc8xx_pic_host_ops = {
+ .map = mpc8xx_pic_host_map,
+ .xlate = mpc8xx_pic_host_xlate,
+};
+
+void __init mpc8xx_pic_init(void)
+{
+ struct resource res;
+ struct device_node *np;
+ int ret;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl,pq1-pic");
+ if (np == NULL)
+ np = of_find_node_by_type(NULL, "mpc8xx-pic");
+ if (np == NULL) {
+ printk(KERN_ERR "Could not find fsl,pq1-pic node\n");
+ return;
+ }
+
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret)
+ goto out;
+
+ siu_reg = ioremap(res.start, resource_size(&res));
+ if (!siu_reg)
+ goto out;
+
+ mpc8xx_pic_host = irq_domain_add_linear(np, 64, &mpc8xx_pic_host_ops, NULL);
+ if (!mpc8xx_pic_host)
+ printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
+
+out:
+ of_node_put(np);
+}
diff --git a/arch/powerpc/platforms/8xx/pic.h b/arch/powerpc/platforms/8xx/pic.h
new file mode 100644
index 000000000..c70f1b446
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/pic.h
@@ -0,0 +1,19 @@
+#ifndef _PPC_KERNEL_MPC8xx_H
+#define _PPC_KERNEL_MPC8xx_H
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+void mpc8xx_pic_init(void);
+unsigned int mpc8xx_get_irq(void);
+
+/*
+ * Some internal interrupt registers use an 8-bit mask for the interrupt
+ * level instead of a number.
+ */
+static inline uint mk_int_int_mask(uint mask)
+{
+ return (1 << (7 - (mask/2)));
+}
+
+#endif /* _PPC_KERNEL_PPC8xx_H */
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
new file mode 100644
index 000000000..d97a7910c
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -0,0 +1,148 @@
+/*
+ * Platform setup for the MPC8xx based boards from TQM.
+ *
+ * Heiko Schocher <hs@denx.de>
+ * Copyright 2010 DENX Software Engineering GmbH
+ *
+ * based on:
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * Copyright 2005 MontaVista Software Inc.
+ *
+ * Heavily modified by Scott Wood <scottwood@freescale.com>
+ * Copyright 2007 Freescale Semiconductor, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+
+#include <linux/fsl_devices.h>
+#include <linux/mii.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/delay.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/time.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm1.h>
+#include <asm/udbg.h>
+
+#include "mpc8xx.h"
+#include "pic.h"
+
+struct cpm_pin {
+ int port, pin, flags;
+};
+
+static struct cpm_pin tqm8xx_pins[] __initdata = {
+ /* SMC1 */
+ {CPM_PORTB, 24, CPM_PIN_INPUT}, /* RX */
+ {CPM_PORTB, 25, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TX */
+
+ /* SCC1 */
+ {CPM_PORTA, 5, CPM_PIN_INPUT}, /* CLK1 */
+ {CPM_PORTA, 7, CPM_PIN_INPUT}, /* CLK2 */
+ {CPM_PORTA, 14, CPM_PIN_INPUT}, /* TX */
+ {CPM_PORTA, 15, CPM_PIN_INPUT}, /* RX */
+ {CPM_PORTC, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY}, /* TENA */
+ {CPM_PORTC, 10, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO},
+ {CPM_PORTC, 11, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_GPIO},
+};
+
+static struct cpm_pin tqm8xx_fec_pins[] __initdata = {
+ /* MII */
+ {CPM_PORTD, 3, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 4, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 5, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 6, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 7, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 8, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 9, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 10, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 11, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 12, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 13, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 14, CPM_PIN_OUTPUT},
+ {CPM_PORTD, 15, CPM_PIN_OUTPUT},
+};
+
+static void __init init_pins(int n, struct cpm_pin *pin)
+{
+ int i;
+
+ for (i = 0; i < n; i++) {
+ cpm1_set_pin(pin->port, pin->pin, pin->flags);
+ pin++;
+ }
+}
+
+static void __init init_ioports(void)
+{
+ struct device_node *dnode;
+ struct property *prop;
+ int len;
+
+ init_pins(ARRAY_SIZE(tqm8xx_pins), &tqm8xx_pins[0]);
+
+ cpm1_clk_setup(CPM_CLK_SMC1, CPM_BRG1, CPM_CLK_RTX);
+
+ dnode = of_find_node_by_name(NULL, "aliases");
+ if (dnode == NULL)
+ return;
+ prop = of_find_property(dnode, "ethernet1", &len);
+
+ of_node_put(dnode);
+
+ if (prop == NULL)
+ return;
+
+ /* init FEC pins */
+ init_pins(ARRAY_SIZE(tqm8xx_fec_pins), &tqm8xx_fec_pins[0]);
+}
+
+static void __init tqm8xx_setup_arch(void)
+{
+ cpm_reset();
+ init_ioports();
+}
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .name = "soc", },
+ { .name = "cpm", },
+ { .name = "localbus", },
+ { .compatible = "simple-bus" },
+ {},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+ of_platform_bus_probe(NULL, of_bus_ids, NULL);
+
+ return 0;
+}
+machine_device_initcall(tqm8xx, declare_of_platform_devices);
+
+define_machine(tqm8xx) {
+ .name = "TQM8xx",
+ .compatible = "tqc,tqm8xx",
+ .setup_arch = tqm8xx_setup_arch,
+ .init_IRQ = mpc8xx_pic_init,
+ .get_irq = mpc8xx_get_irq,
+ .restart = mpc8xx_restart,
+ .calibrate_decr = mpc8xx_calibrate_decr,
+ .set_rtc_time = mpc8xx_set_rtc_time,
+ .get_rtc_time = mpc8xx_get_rtc_time,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
new file mode 100644
index 000000000..1fd253f92
--- /dev/null
+++ b/arch/powerpc/platforms/Kconfig
@@ -0,0 +1,307 @@
+# SPDX-License-Identifier: GPL-2.0
+menu "Platform support"
+
+source "arch/powerpc/platforms/powernv/Kconfig"
+source "arch/powerpc/platforms/pseries/Kconfig"
+source "arch/powerpc/platforms/chrp/Kconfig"
+source "arch/powerpc/platforms/512x/Kconfig"
+source "arch/powerpc/platforms/52xx/Kconfig"
+source "arch/powerpc/platforms/powermac/Kconfig"
+source "arch/powerpc/platforms/maple/Kconfig"
+source "arch/powerpc/platforms/pasemi/Kconfig"
+source "arch/powerpc/platforms/ps3/Kconfig"
+source "arch/powerpc/platforms/cell/Kconfig"
+source "arch/powerpc/platforms/8xx/Kconfig"
+source "arch/powerpc/platforms/82xx/Kconfig"
+source "arch/powerpc/platforms/83xx/Kconfig"
+source "arch/powerpc/platforms/85xx/Kconfig"
+source "arch/powerpc/platforms/86xx/Kconfig"
+source "arch/powerpc/platforms/embedded6xx/Kconfig"
+source "arch/powerpc/platforms/44x/Kconfig"
+source "arch/powerpc/platforms/40x/Kconfig"
+source "arch/powerpc/platforms/amigaone/Kconfig"
+source "arch/powerpc/platforms/book3s/Kconfig"
+source "arch/powerpc/platforms/microwatt/Kconfig"
+
+config KVM_GUEST
+ bool "KVM Guest support"
+ select EPAPR_PARAVIRT
+ help
+ This option enables various optimizations for running under the KVM
+ hypervisor. Overhead for the kernel when not running inside KVM should
+ be minimal.
+
+ In case of doubt, say Y
+
+config EPAPR_PARAVIRT
+ bool "ePAPR para-virtualization support"
+ help
+ Enables ePAPR para-virtualization support for guests.
+
+ In case of doubt, say Y
+
+config PPC_HASH_MMU_NATIVE
+ bool
+ depends on PPC_BOOK3S
+ help
+ Support for running natively on the hardware, i.e. without
+ a hypervisor. This option is not user-selectable but should
+ be selected by all platforms that need it.
+
+config PPC_OF_BOOT_TRAMPOLINE
+ bool "Support booting from Open Firmware or yaboot"
+ depends on PPC_BOOK3S_32 || PPC64
+ select RELOCATABLE if PPC64
+ default y
+ help
+ Support from booting from Open Firmware or yaboot using an
+ Open Firmware client interface. This enables the kernel to
+ communicate with open firmware to retrieve system information
+ such as the device tree.
+
+ In case of doubt, say Y
+
+config PPC_DT_CPU_FTRS
+ bool "Device-tree based CPU feature discovery & setup"
+ depends on PPC_BOOK3S_64
+ default y
+ help
+ This enables code to use a new device tree binding for describing CPU
+ compatibility and features. Saying Y here will attempt to use the new
+ binding if the firmware provides it. Currently only the skiboot
+ firmware provides this binding.
+ If you're not sure say Y.
+
+config UDBG_RTAS_CONSOLE
+ bool "RTAS based debug console"
+ depends on PPC_RTAS
+
+config PPC_SMP_MUXED_IPI
+ bool
+ help
+ Select this option if your platform supports SMP and your
+ interrupt controller provides less than 4 interrupts to each
+ cpu. This will enable the generic code to multiplex the 4
+ messages on to one ipi.
+
+config IPIC
+ bool
+
+config MPIC
+ bool
+
+config MPIC_TIMER
+ bool "MPIC Global Timer"
+ depends on MPIC && FSL_SOC
+ help
+ The MPIC global timer is a hardware timer inside the
+ Freescale PIC complying with OpenPIC standard. When the
+ specified interval times out, the hardware timer generates
+ an interrupt. The driver currently is only tested on fsl
+ chip, but it can potentially support other global timers
+ complying with the OpenPIC standard.
+
+config FSL_MPIC_TIMER_WAKEUP
+ tristate "Freescale MPIC global timer wakeup driver"
+ depends on FSL_SOC && MPIC_TIMER && PM
+ help
+ The driver provides a way to wake up the system by MPIC
+ timer.
+ e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup"
+
+config PPC_EPAPR_HV_PIC
+ bool
+ select EPAPR_PARAVIRT
+
+config MPIC_WEIRD
+ bool
+
+config MPIC_MSGR
+ bool "MPIC message register support"
+ depends on MPIC
+ help
+ Enables support for the MPIC message registers. These
+ registers are used for inter-processor communication.
+
+config PPC_I8259
+ bool
+
+config U3_DART
+ bool
+ depends on PPC64
+
+config PPC_RTAS
+ bool
+
+config RTAS_ERROR_LOGGING
+ bool
+ depends on PPC_RTAS
+
+config PPC_RTAS_DAEMON
+ bool
+ depends on PPC_RTAS
+
+config RTAS_PROC
+ bool "Proc interface to RTAS"
+ depends on PPC_RTAS && PROC_FS
+ default y
+
+config RTAS_FLASH
+ tristate "Firmware flash interface"
+ depends on PPC64 && RTAS_PROC
+
+config MMIO_NVRAM
+ bool
+
+config MPIC_U3_HT_IRQS
+ bool
+
+config MPIC_BROKEN_REGREAD
+ bool
+ depends on MPIC
+ help
+ This option enables a MPIC driver workaround for some chips
+ that have a bug that causes some interrupt source information
+ to not read back properly. It is safe to use on other chips as
+ well, but enabling it uses about 8KB of memory to keep copies
+ of the register contents in software.
+
+config EEH
+ bool
+ depends on (PPC_POWERNV || PPC_PSERIES) && PCI
+ default y
+
+config PPC_MPC106
+ bool
+
+config PPC_970_NAP
+ bool
+
+config PPC_P7_NAP
+ bool
+
+config PPC_BOOK3S_IDLE
+ def_bool y
+ depends on (PPC_970_NAP || PPC_P7_NAP)
+
+config PPC_INDIRECT_PIO
+ bool
+ select GENERIC_IOMAP
+
+config PPC_INDIRECT_MMIO
+ bool
+
+config PPC_IO_WORKAROUNDS
+ bool
+
+source "drivers/cpufreq/Kconfig"
+
+menu "CPUIdle driver"
+
+source "drivers/cpuidle/Kconfig"
+
+endmenu
+
+config TAU
+ bool "On-chip CPU temperature sensor support"
+ depends on PPC_BOOK3S_32
+ help
+ G3 and G4 processors have an on-chip temperature sensor called the
+ 'Thermal Assist Unit (TAU)', which, in theory, can measure the on-die
+ temperature within 2-4 degrees Celsius. This option shows the current
+ on-die temperature in /proc/cpuinfo if the cpu supports it.
+
+ Unfortunately, this sensor is very inaccurate when uncalibrated, so
+ don't assume the cpu temp is actually what /proc/cpuinfo says it is.
+
+config TAU_INT
+ bool "Interrupt driven TAU driver (EXPERIMENTAL)"
+ depends on TAU
+ help
+ The TAU supports an interrupt driven mode which causes an interrupt
+ whenever the temperature goes out of range. This is the fastest way
+ to get notified the temp has exceeded a range. With this option off,
+ a timer is used to re-check the temperature periodically.
+
+ If in doubt, say N here.
+
+config TAU_AVERAGE
+ bool "Average high and low temp"
+ depends on TAU
+ help
+ The TAU hardware can compare the temperature to an upper and lower
+ bound. The default behavior is to show both the upper and lower
+ bound in /proc/cpuinfo. If the range is large, the temperature is
+ either changing a lot, or the TAU hardware is broken (likely on some
+ G4's). If the range is small (around 4 degrees), the temperature is
+ relatively stable. If you say Y here, a single temperature value,
+ halfway between the upper and lower bounds, will be reported in
+ /proc/cpuinfo.
+
+ If in doubt, say N here.
+
+config QE_GPIO
+ bool "QE GPIO support"
+ depends on QUICC_ENGINE
+ select GPIOLIB
+ select OF_GPIO_MM_GPIOCHIP
+ help
+ Say Y here if you're going to use hardware that connects to the
+ QE GPIOs.
+
+config CPM2
+ bool "Enable support for the CPM2 (Communications Processor Module)"
+ depends on (FSL_SOC_BOOKE && PPC32) || PPC_82xx
+ select CPM
+ select HAVE_PCI
+ select GPIOLIB
+ select OF_GPIO_MM_GPIOCHIP
+ help
+ The CPM2 (Communications Processor Module) is a coprocessor on
+ embedded CPUs made by Freescale. Selecting this option means that
+ you wish to build a kernel for a machine with a CPM2 coprocessor
+ on it (826x, 827x, 8560).
+
+config FSL_ULI1575
+ bool "ULI1575 PCIe south bridge support"
+ depends on FSL_SOC_BOOKE || PPC_86xx
+ depends on PCI
+ select FSL_PCI
+ select GENERIC_ISA_DMA
+ help
+ Supports for the ULI1575 PCIe south bridge that exists on some
+ Freescale reference boards. The boards all use the ULI in pretty
+ much the same way.
+
+config CPM
+ bool
+ select GENERIC_ALLOCATOR
+
+config OF_RTC
+ bool
+ help
+ Uses information from the OF or flattened device tree to instantiate
+ platform devices for direct mapped RTC chips like the DS1742 or DS1743.
+
+config GEN_RTC
+ bool "Use the platform RTC operations from user space"
+ select RTC_CLASS
+ select RTC_DRV_GENERIC
+ help
+ This option provides backwards compatibility with the old gen_rtc.ko
+ module that was traditionally used for old PowerPC machines.
+ Platforms should migrate to enabling the RTC_DRV_GENERIC by hand
+ replacing their get_rtc_time/set_rtc_time callbacks with
+ a proper RTC device driver.
+
+config MCU_MPC8349EMITX
+ bool "MPC8349E-mITX MCU driver"
+ depends on I2C=y && PPC_83xx
+ select GPIOLIB
+ help
+ Say Y here to enable soft power-off functionality on the Freescale
+ boards with the MPC8349E-mITX-compatible MCU chips. This driver will
+ also register MCU GPIOs with the generic GPIO API, so you'll able
+ to use MCU pins as GPIOs.
+
+endmenu
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
new file mode 100644
index 000000000..b2d8c0da2
--- /dev/null
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -0,0 +1,646 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC32
+ bool
+ default y if !PPC64
+
+config PPC64
+ bool "64-bit kernel"
+ select ZLIB_DEFLATE
+ help
+ This option selects whether a 32-bit or a 64-bit kernel
+ will be built.
+
+menu "Processor support"
+choice
+ prompt "Processor Type"
+ depends on PPC32
+ help
+ There are five families of 32 bit PowerPC chips supported.
+ The most common ones are the desktop and server CPUs (603,
+ 604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
+ embedded 512x/52xx/82xx/83xx/86xx counterparts.
+ The other embedded parts, namely 4xx, 8xx and e500
+ (85xx) each form a family of their own that is not compatible
+ with the others.
+
+ If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
+
+config PPC_BOOK3S_32
+ bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
+ imply PPC_FPU
+ select PPC_HAVE_PMU_SUPPORT
+ select HAVE_ARCH_VMAP_STACK
+
+config PPC_85xx
+ bool "Freescale 85xx"
+ select PPC_E500
+
+config PPC_8xx
+ bool "Freescale 8xx"
+ select ARCH_SUPPORTS_HUGETLBFS
+ select FSL_SOC
+ select PPC_KUEP
+ select HAVE_ARCH_VMAP_STACK
+ select HUGETLBFS
+
+config 40x
+ bool "AMCC 40x"
+ select PPC_DCR_NATIVE
+ select PPC_UDBG_16550
+ select 4xx_SOC
+ select HAVE_PCI
+ select PPC_KUEP if PPC_KUAP
+
+config 44x
+ bool "AMCC 44x, 46x or 47x"
+ select PPC_DCR_NATIVE
+ select PPC_UDBG_16550
+ select 4xx_SOC
+ select HAVE_PCI
+ select PHYS_64BIT
+ select PPC_KUEP
+
+endchoice
+
+config PPC_BOOK3S_603
+ bool "Support for 603 SW loaded TLB"
+ depends on PPC_BOOK3S_32
+ default y
+ help
+ Provide support for processors based on the 603 cores. Those
+ processors don't have a HASH MMU and provide SW TLB loading.
+
+config PPC_BOOK3S_604
+ bool "Support for 604+ HASH MMU" if PPC_BOOK3S_603
+ depends on PPC_BOOK3S_32
+ default y
+ help
+ Provide support for processors not based on the 603 cores.
+ Those processors have a HASH MMU.
+
+choice
+ prompt "Processor Type"
+ depends on PPC64
+ help
+ There are two families of 64 bit PowerPC chips supported.
+ The most common ones are the desktop and server CPUs
+ (POWER5, 970, POWER5+, POWER6, POWER7, POWER8, POWER9 ...)
+
+ The other are the "embedded" processors compliant with the
+ "Book 3E" variant of the architecture
+
+config PPC_BOOK3S_64
+ bool "Server processors"
+ select PPC_FPU
+ select PPC_HAVE_PMU_SUPPORT
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+ select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
+ select ARCH_ENABLE_SPLIT_PMD_PTLOCK
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
+ select ARCH_SUPPORTS_HUGETLBFS
+ select ARCH_SUPPORTS_NUMA_BALANCING
+ select HAVE_MOVE_PMD
+ select HAVE_MOVE_PUD
+ select IRQ_WORK
+ select PPC_64S_HASH_MMU if !PPC_RADIX_MMU
+ select KASAN_VMALLOC if KASAN
+
+config PPC_BOOK3E_64
+ bool "Embedded processors"
+ select PPC_E500
+ select PPC_E500MC
+ select PPC_FPU # Make it a choice ?
+ select PPC_SMP_MUXED_IPI
+ select PPC_DOORBELL
+ select ZONE_DMA
+
+endchoice
+
+choice
+ prompt "CPU selection"
+ help
+ This will create a kernel which is optimised for a particular CPU.
+ The resulting kernel may not run on other CPUs, so use this with care.
+
+ If unsure, select Generic.
+
+config POWERPC64_CPU
+ bool "Generic (POWER5 and PowerPC 970 and above)"
+ depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
+
+config POWERPC64_CPU
+ bool "Generic (POWER8 and above)"
+ depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
+ select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
+ select PPC_HAS_LBARX_LHARX
+
+config POWERPC_CPU
+ bool "Generic 32 bits powerpc"
+ depends on PPC_BOOK3S_32
+
+config CELL_CPU
+ bool "Cell Broadband Engine"
+ depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ depends on !CC_IS_CLANG
+ select PPC_64S_HASH_MMU
+
+config PPC_970_CPU
+ bool "PowerPC 970 (including PowerPC G5)"
+ depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
+
+config POWER6_CPU
+ bool "POWER6"
+ depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
+
+config POWER7_CPU
+ bool "POWER7"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
+ select PPC_HAS_LBARX_LHARX
+
+config POWER8_CPU
+ bool "POWER8"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
+ select PPC_HAS_LBARX_LHARX
+
+config POWER9_CPU
+ bool "POWER9"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_HAS_LBARX_LHARX
+
+config POWER10_CPU
+ bool "POWER10"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_HAVE_PREFIXED_SUPPORT
+ select PPC_HAVE_PCREL_SUPPORT
+
+config E5500_CPU
+ bool "Freescale e5500"
+ depends on PPC64 && PPC_E500
+
+config E6500_CPU
+ bool "Freescale e6500"
+ depends on PPC64 && PPC_E500
+ depends on !CC_IS_CLANG
+ select PPC_HAS_LBARX_LHARX
+
+config 405_CPU
+ bool "40x family"
+ depends on 40x
+ depends on !CC_IS_CLANG
+
+config 440_CPU
+ bool "440 (44x family)"
+ depends on 44x
+
+config 464_CPU
+ bool "464 (44x family)"
+ depends on 44x
+ depends on !CC_IS_CLANG
+
+config 476_CPU
+ bool "476 (47x family)"
+ depends on PPC_47x
+ depends on !CC_IS_CLANG
+
+config 860_CPU
+ bool "8xx family"
+ depends on PPC_8xx
+ depends on !CC_IS_CLANG
+
+config E300C2_CPU
+ bool "e300c2 (832x)"
+ depends on PPC_BOOK3S_32
+ depends on !CC_IS_CLANG
+
+config E300C3_CPU
+ bool "e300c3 (831x)"
+ depends on PPC_BOOK3S_32
+ depends on !CC_IS_CLANG
+
+config G4_CPU
+ bool "G4 (74xx)"
+ depends on PPC_BOOK3S_32
+ select ALTIVEC
+
+config E500_CPU
+ bool "e500 (8540)"
+ depends on PPC_85xx && !PPC_E500MC
+
+config E500MC_CPU
+ bool "e500mc"
+ depends on PPC_85xx && PPC_E500MC
+
+config TOOLCHAIN_DEFAULT_CPU
+ bool "Rely on the toolchain's implicit default CPU"
+
+endchoice
+
+config TARGET_CPU_BOOL
+ bool
+ default !TOOLCHAIN_DEFAULT_CPU
+
+config TARGET_CPU
+ string
+ depends on TARGET_CPU_BOOL
+ default "cell" if CELL_CPU
+ default "970" if PPC_970_CPU
+ default "power6" if POWER6_CPU
+ default "power7" if POWER7_CPU
+ default "power8" if POWER8_CPU
+ default "power9" if POWER9_CPU
+ default "power10" if POWER10_CPU
+ default "e5500" if E5500_CPU
+ default "e6500" if E6500_CPU
+ default "power4" if POWERPC64_CPU && !CPU_LITTLE_ENDIAN
+ default "power8" if POWERPC64_CPU && CPU_LITTLE_ENDIAN
+ default "405" if 405_CPU
+ default "440" if 440_CPU
+ default "464" if 464_CPU
+ default "476" if 476_CPU
+ default "860" if 860_CPU
+ default "e300c2" if E300C2_CPU
+ default "e300c3" if E300C3_CPU
+ default "G4" if G4_CPU
+ default "8540" if E500_CPU
+ default "e500mc" if E500MC_CPU
+ default "powerpc" if POWERPC_CPU
+
+config TUNE_CPU
+ string
+ depends on POWERPC64_CPU
+ default "-mtune=power10" if $(cc-option,-mtune=power10)
+ default "-mtune=power9" if $(cc-option,-mtune=power9)
+ default "-mtune=power8" if $(cc-option,-mtune=power8)
+
+config PPC_BOOK3S
+ def_bool y
+ depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
+
+config PPC_E500
+ select FSL_EMB_PERFMON
+ bool
+ select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
+ select PPC_SMP_MUXED_IPI
+ select PPC_DOORBELL
+ select PPC_KUEP
+
+config PPC_E500MC
+ bool "e500mc Support"
+ select PPC_FPU
+ select COMMON_CLK
+ depends on PPC_E500
+ help
+ This must be enabled for running on e500mc (and derivatives
+ such as e5500/e6500), and must be disabled for running on
+ e500v1 or e500v2.
+
+config PPC_FPU_REGS
+ bool
+
+config PPC_FPU
+ bool "Support for Floating Point Unit (FPU)" if PPC_MPC832x
+ default y if PPC64
+ select PPC_FPU_REGS
+ help
+ This must be enabled to support the Floating Point Unit
+ Most 6xx have an FPU but e300c2 core (mpc832x) don't have
+ an FPU, so when building an embedded kernel for that target
+ you can disable FPU support.
+
+ If unsure say Y.
+
+config FSL_EMB_PERFMON
+ bool "Freescale Embedded Perfmon"
+ depends on PPC_E500 || PPC_83xx
+ help
+ This is the Performance Monitor support found on the e500 core
+ and some e300 cores (c3 and c4). Select this only if your
+ core supports the Embedded Performance Monitor APU
+
+config FSL_EMB_PERF_EVENT
+ bool
+ depends on FSL_EMB_PERFMON && PERF_EVENTS && !PPC_PERF_CTRS
+ default y
+
+config FSL_EMB_PERF_EVENT_E500
+ bool
+ depends on FSL_EMB_PERF_EVENT && PPC_E500
+ default y
+
+config 4xx
+ bool
+ depends on 40x || 44x
+ default y
+
+config BOOKE
+ bool
+ depends on PPC_E500 || 44x
+ default y
+
+config BOOKE_OR_40x
+ bool
+ depends on BOOKE || 40x
+ default y
+
+config PTE_64BIT
+ bool
+ depends on 44x || PPC_E500 || PPC_86xx
+ default y if PHYS_64BIT
+
+config PHYS_64BIT
+ bool 'Large physical address support' if PPC_E500 || PPC_86xx
+ depends on (44x || PPC_E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx
+ select PHYS_ADDR_T_64BIT
+ help
+ This option enables kernel support for larger than 32-bit physical
+ addresses. This feature may not be available on all cores.
+
+ If you have more than 3.5GB of RAM or so, you also need to enable
+ SWIOTLB under Kernel Options for this to work. The actual number
+ is platform-dependent.
+
+ If in doubt, say N here.
+
+config ALTIVEC
+ bool "AltiVec Support"
+ depends on PPC_BOOK3S || (PPC_E500MC && PPC64 && !E5500_CPU)
+ select PPC_FPU
+ help
+ This option enables kernel support for the Altivec extensions to the
+ PowerPC processor. The kernel currently supports saving and restoring
+ altivec registers, and turning on the 'altivec enable' bit so user
+ processes can execute altivec instructions.
+
+ This option is only usefully if you have a processor that supports
+ altivec (G4, otherwise known as 74xx series), but does not have
+ any affect on a non-altivec cpu (it does, however add code to the
+ kernel).
+
+ If in doubt, say Y here.
+
+config VSX
+ bool "VSX Support"
+ depends on PPC_BOOK3S_64 && ALTIVEC && PPC_FPU
+ help
+
+ This option enables kernel support for the Vector Scaler extensions
+ to the PowerPC processor. The kernel currently supports saving and
+ restoring VSX registers, and turning on the 'VSX enable' bit so user
+ processes can execute VSX instructions.
+
+ This option is only useful if you have a processor that supports
+ VSX (P7 and above), but does not have any affect on a non-VSX
+ CPUs (it does, however add code to the kernel).
+
+ If in doubt, say Y here.
+
+config SPE_POSSIBLE
+ def_bool y
+ depends on PPC_E500 && !PPC_E500MC
+
+config SPE
+ bool "SPE Support"
+ depends on SPE_POSSIBLE
+ default y
+ help
+ This option enables kernel support for the Signal Processing
+ Extensions (SPE) to the PowerPC processor. The kernel currently
+ supports saving and restoring SPE registers, and turning on the
+ 'spe enable' bit so user processes can execute SPE instructions.
+
+ This option is only useful if you have a processor that supports
+ SPE (e500, otherwise known as 85xx series), but does not have any
+ effect on a non-spe cpu (it does, however add code to the kernel).
+
+ If in doubt, say Y here.
+
+config PPC_64S_HASH_MMU
+ bool "Hash MMU Support"
+ depends on PPC_BOOK3S_64
+ default y
+ help
+ Enable support for the Power ISA Hash style MMU. This is implemented
+ by all IBM Power and other 64-bit Book3S CPUs before ISA v3.0. The
+ OpenPOWER ISA does not mandate the hash MMU and some CPUs do not
+ implement it (e.g., Microwatt).
+
+ Note that POWER9 PowerVM platforms only support the hash
+ MMU. From POWER10 radix is also supported by PowerVM.
+
+ If you're unsure, say Y.
+
+config PPC_RADIX_MMU
+ bool "Radix MMU Support"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_GIGANTIC_PAGE
+ default y
+ help
+ Enable support for the Power ISA 3.0 Radix style MMU. Currently this
+ is only implemented by IBM Power9 CPUs, if you don't have one of them
+ you can probably disable this.
+
+config PPC_RADIX_MMU_DEFAULT
+ bool "Default to using the Radix MMU when possible" if PPC_64S_HASH_MMU
+ depends on PPC_BOOK3S_64
+ depends on PPC_RADIX_MMU
+ default y
+ help
+ When the hardware supports the Radix MMU, default to using it unless
+ "disable_radix[=yes]" is specified on the kernel command line.
+
+ If this option is disabled, the Hash MMU will be used by default,
+ unless "disable_radix=no" is specified on the kernel command line.
+
+ If you're unsure, say Y.
+
+config PPC_KERNEL_PREFIXED
+ depends on PPC_HAVE_PREFIXED_SUPPORT
+ depends on CC_HAS_PREFIXED
+ default n
+ bool "Build Kernel with Prefixed Instructions"
+ help
+ POWER10 and later CPUs support prefixed instructions, 8 byte
+ instructions that include large immediate, pc relative addressing,
+ and various floating point, vector, MMA.
+
+ This option builds the kernel with prefixed instructions, and
+ allows a pc relative addressing option to be selected.
+
+ Kernel support for prefixed instructions in applications and guests
+ is not affected by this option.
+
+config PPC_KERNEL_PCREL
+ depends on PPC_HAVE_PCREL_SUPPORT
+ depends on PPC_HAVE_PREFIXED_SUPPORT
+ depends on CC_HAS_PCREL
+ default n
+ select PPC_KERNEL_PREFIXED
+ bool "Build Kernel with PC-Relative addressing model"
+ help
+ POWER10 and later CPUs support pc relative addressing. Recent
+ compilers have support for an ELF ABI extension for a pc relative
+ ABI.
+
+ This option builds the kernel with the pc relative ABI model.
+
+config PPC_KUEP
+ bool "Kernel Userspace Execution Prevention" if !40x
+ default y if !40x
+ help
+ Enable support for Kernel Userspace Execution Prevention (KUEP)
+
+ If you're unsure, say Y.
+
+config PPC_KUAP
+ bool "Kernel Userspace Access Protection"
+ default y
+ help
+ Enable support for Kernel Userspace Access Protection (KUAP)
+
+ If you're unsure, say Y.
+
+config PPC_KUAP_DEBUG
+ bool "Extra debugging for Kernel Userspace Access Protection"
+ depends on PPC_KUAP
+ help
+ Add extra debugging for Kernel Userspace Access Protection (KUAP)
+ If you're unsure, say N.
+
+config PPC_PKEY
+ def_bool y
+ depends on PPC_BOOK3S_64
+ depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP
+
+
+config PPC_MMU_NOHASH
+ def_bool y
+ depends on !PPC_BOOK3S
+
+config PPC_HAVE_PMU_SUPPORT
+ bool
+
+config PPC_HAVE_PREFIXED_SUPPORT
+ bool
+
+config PPC_HAVE_PCREL_SUPPORT
+ bool
+
+config PMU_SYSFS
+ bool "Create PMU SPRs sysfs file"
+ default n
+ help
+ This option enables sysfs file creation for PMU SPRs like MMCR* and PMC*.
+
+config PPC_PERF_CTRS
+ def_bool y
+ depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
+ help
+ This enables the powerpc-specific perf_event back-end.
+
+config FORCE_SMP
+ # Allow platforms to force SMP=y by selecting this
+ bool
+ select SMP
+
+config SMP
+ depends on PPC_BOOK3S || PPC_E500 || PPC_47x
+ select GENERIC_IRQ_MIGRATION
+ bool "Symmetric multi-processing support" if !FORCE_SMP
+ help
+ This enables support for systems with more than one CPU. If you have
+ a system with only one CPU, say N. If you have a system with more
+ than one CPU, say Y. Note that the kernel does not currently
+ support SMP machines with 603/603e/603ev or PPC750 ("G3") processors
+ since they have inadequate hardware support for multiprocessor
+ operation.
+
+ If you say N here, the kernel will run on single and multiprocessor
+ machines, but will use only one CPU of a multiprocessor machine. If
+ you say Y here, the kernel will run on single-processor machines.
+ On a single-processor machine, the kernel will run faster if you say
+ N here.
+
+ If you don't know what to do here, say N.
+
+config NR_CPUS
+ int "Maximum number of CPUs (2-8192)" if SMP
+ range 2 8192 if SMP
+ default "1" if !SMP
+ default "32" if PPC64
+ default "4"
+
+config NOT_COHERENT_CACHE
+ bool
+ depends on 4xx || PPC_8xx || PPC_MPC512x || \
+ GAMECUBE_COMMON || AMIGAONE
+ select ARCH_HAS_DMA_PREP_COHERENT
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select DMA_DIRECT_REMAP
+ default n if PPC_47x
+ default y
+
+config CHECK_CACHE_COHERENCY
+ bool
+
+config PPC_DOORBELL
+ bool
+
+endmenu
+
+config VDSO32
+ def_bool y
+ depends on PPC32 || COMPAT
+ help
+ This symbol controls whether we build the 32-bit VDSO. We obviously
+ want to do that if we're building a 32-bit kernel. If we're building
+ a 64-bit kernel then we only want a 32-bit VDSO if we're also enabling
+ COMPAT.
+
+choice
+ prompt "Endianness selection"
+ default CPU_BIG_ENDIAN
+ help
+ This option selects whether a big endian or little endian kernel will
+ be built.
+
+config CPU_BIG_ENDIAN
+ bool "Build big endian kernel"
+ help
+ Build a big endian kernel.
+
+ If unsure, select this option.
+
+config CPU_LITTLE_ENDIAN
+ bool "Build little endian kernel"
+ depends on PPC_BOOK3S_64
+ select PPC64_BOOT_WRAPPER
+ help
+ Build a little endian kernel.
+
+ Note that if cross compiling a little endian kernel,
+ CROSS_COMPILE must point to a toolchain capable of targeting
+ little endian powerpc.
+
+endchoice
+
+config PPC64_ELF_ABI_V1
+ def_bool PPC64 && (CPU_BIG_ENDIAN && !PPC64_BIG_ENDIAN_ELF_ABI_V2)
+
+config PPC64_ELF_ABI_V2
+ def_bool PPC64 && !PPC64_ELF_ABI_V1
+
+config PPC64_BOOT_WRAPPER
+ def_bool n
+ depends on CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
new file mode 100644
index 000000000..94470fb27
--- /dev/null
+++ b/arch/powerpc/platforms/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_FSL_ULI1575) += fsl_uli1575.o
+
+obj-$(CONFIG_PPC_PMAC) += powermac/
+obj-$(CONFIG_PPC_CHRP) += chrp/
+obj-$(CONFIG_4xx) += 4xx/
+obj-$(CONFIG_40x) += 40x/
+obj-$(CONFIG_44x) += 44x/
+obj-$(CONFIG_PPC_MPC512x) += 512x/
+obj-$(CONFIG_PPC_MPC52xx) += 52xx/
+obj-$(CONFIG_PPC_8xx) += 8xx/
+obj-$(CONFIG_PPC_82xx) += 82xx/
+obj-$(CONFIG_PPC_83xx) += 83xx/
+obj-$(CONFIG_FSL_SOC_BOOKE) += 85xx/
+obj-$(CONFIG_PPC_86xx) += 86xx/
+obj-$(CONFIG_PPC_POWERNV) += powernv/
+obj-$(CONFIG_PPC_PSERIES) += pseries/
+obj-$(CONFIG_PPC_MAPLE) += maple/
+obj-$(CONFIG_PPC_PASEMI) += pasemi/
+obj-$(CONFIG_PPC_CELL) += cell/
+obj-$(CONFIG_PPC_PS3) += ps3/
+obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/
+obj-$(CONFIG_AMIGAONE) += amigaone/
+obj-$(CONFIG_PPC_BOOK3S) += book3s/
+obj-$(CONFIG_PPC_MICROWATT) += microwatt/
diff --git a/arch/powerpc/platforms/amigaone/Kconfig b/arch/powerpc/platforms/amigaone/Kconfig
new file mode 100644
index 000000000..0741edb10
--- /dev/null
+++ b/arch/powerpc/platforms/amigaone/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+config AMIGAONE
+ bool "Eyetech AmigaOne/MAI Teron"
+ depends on PPC_BOOK3S_32 && BROKEN_ON_SMP
+ select PPC_I8259
+ select PPC_INDIRECT_PCI
+ select PPC_UDBG_16550
+ select FORCE_PCI
+ select NOT_COHERENT_CACHE
+ select CHECK_CACHE_COHERENCY
+ select DEFAULT_UIMAGE
+ select HAVE_PCSPKR_PLATFORM
+ help
+ Select AmigaOne for the following machines:
+ - AmigaOne SE/Teron CX (G3 only)
+ - AmigaOne XE/Teron PX
+ - uA1/Teron mini
+ More information is available at:
+ <http://amigaone-linux.sourceforge.net/>.
diff --git a/arch/powerpc/platforms/amigaone/Makefile b/arch/powerpc/platforms/amigaone/Makefile
new file mode 100644
index 000000000..e95e4e3e2
--- /dev/null
+++ b/arch/powerpc/platforms/amigaone/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y += setup.o
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
new file mode 100644
index 000000000..6c6e714a7
--- /dev/null
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AmigaOne platform setup
+ *
+ * Copyright 2008 Gerhard Pircher (gerhard_pircher@gmx.net)
+ *
+ * Based on original amigaone_setup.c source code
+ * Copyright 2003 by Hans-Joerg Frieden and Thomas Frieden
+ */
+
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/seq_file.h>
+#include <generated/utsrelease.h>
+
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/pci-bridge.h>
+#include <asm/i8259.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/dma.h>
+
+extern void __flush_disable_L1(void);
+
+void amigaone_show_cpuinfo(struct seq_file *m)
+{
+ seq_printf(m, "vendor\t\t: Eyetech Ltd.\n");
+}
+
+static int __init amigaone_add_bridge(struct device_node *dev)
+{
+ const u32 *cfg_addr, *cfg_data;
+ int len;
+ const int *bus_range;
+ struct pci_controller *hose;
+
+ printk(KERN_INFO "Adding PCI host bridge %pOF\n", dev);
+
+ cfg_addr = of_get_address(dev, 0, NULL, NULL);
+ cfg_data = of_get_address(dev, 1, NULL, NULL);
+ if ((cfg_addr == NULL) || (cfg_data == NULL))
+ return -ENODEV;
+
+ bus_range = of_get_property(dev, "bus-range", &len);
+ if ((bus_range == NULL) || (len < 2 * sizeof(int)))
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
+
+ hose = pcibios_alloc_controller(dev);
+ if (hose == NULL)
+ return -ENOMEM;
+
+ hose->first_busno = bus_range ? bus_range[0] : 0;
+ hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+ setup_indirect_pci(hose, cfg_addr[0], cfg_data[0], 0);
+
+ /* Interpret the "ranges" property */
+ /* This also maps the I/O region and sets isa_io/mem_base */
+ pci_process_bridge_OF_ranges(hose, dev, 1);
+
+ return 0;
+}
+
+void __init amigaone_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0);
+}
+
+static void __init amigaone_discover_phbs(void)
+{
+ struct device_node *np;
+ int phb = -ENODEV;
+
+ /* Lookup PCI host bridges. */
+ for_each_compatible_node(np, "pci", "mai-logic,articia-s")
+ phb = amigaone_add_bridge(np);
+
+ BUG_ON(phb != 0);
+}
+
+void __init amigaone_init_IRQ(void)
+{
+ struct device_node *pic, *np = NULL;
+ const unsigned long *prop = NULL;
+ unsigned long int_ack = 0;
+
+ /* Search for ISA interrupt controller. */
+ pic = of_find_compatible_node(NULL, "interrupt-controller",
+ "pnpPNP,000");
+ BUG_ON(pic == NULL);
+
+ /* Look for interrupt acknowledge address in the PCI root node. */
+ np = of_find_compatible_node(NULL, "pci", "mai-logic,articia-s");
+ if (np) {
+ prop = of_get_property(np, "8259-interrupt-acknowledge", NULL);
+ if (prop)
+ int_ack = prop[0];
+ of_node_put(np);
+ }
+
+ if (int_ack == 0)
+ printk(KERN_WARNING "Cannot find PCI interrupt acknowledge"
+ " address, polling\n");
+
+ i8259_init(pic, int_ack);
+ ppc_md.get_irq = i8259_irq;
+ irq_set_default_host(i8259_get_host());
+}
+
+static int __init request_isa_regions(void)
+{
+ request_region(0x00, 0x20, "dma1");
+ request_region(0x40, 0x20, "timer");
+ request_region(0x80, 0x10, "dma page reg");
+ request_region(0xc0, 0x20, "dma2");
+
+ return 0;
+}
+machine_device_initcall(amigaone, request_isa_regions);
+
+void __noreturn amigaone_restart(char *cmd)
+{
+ local_irq_disable();
+
+ /* Flush and disable caches. */
+ __flush_disable_L1();
+
+ /* Set SRR0 to the reset vector and turn on MSR_IP. */
+ mtspr(SPRN_SRR0, 0xfff00100);
+ mtspr(SPRN_SRR1, MSR_IP);
+
+ /* Do an rfi to jump back to firmware. */
+ __asm__ __volatile__("rfi" : : : "memory");
+
+ /* Not reached. */
+ while (1);
+}
+
+static int __init amigaone_probe(void)
+{
+ /*
+ * Coherent memory access cause complete system lockup! Thus
+ * disable this CPU feature, even if the CPU needs it.
+ */
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_NEED_COHERENT;
+
+ DMA_MODE_READ = 0x44;
+ DMA_MODE_WRITE = 0x48;
+
+ return 1;
+}
+
+define_machine(amigaone) {
+ .name = "AmigaOne",
+ .compatible = "eyetech,amigaone",
+ .probe = amigaone_probe,
+ .setup_arch = amigaone_setup_arch,
+ .discover_phbs = amigaone_discover_phbs,
+ .show_cpuinfo = amigaone_show_cpuinfo,
+ .init_IRQ = amigaone_init_IRQ,
+ .restart = amigaone_restart,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig
new file mode 100644
index 000000000..34c931592
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_VAS
+ bool "IBM Virtual Accelerator Switchboard (VAS)"
+ depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES
+ default y
+ help
+ This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+ VAS devices are found in POWER9-based and later systems, they
+ provide access to accelerator coprocessors such as NX-GZIP and
+ NX-842. This config allows the kernel to use NX-842 accelerators,
+ and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV
+ and POWER10 PowerVM platforms.
+
+ If unsure, say "N".
diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile
new file mode 100644
index 000000000..e790f1910
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PPC_VAS) += vas-api.o
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
new file mode 100644
index 000000000..f381b177e
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -0,0 +1,634 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * VAS user space API for its accelerators (Only NX-GZIP is supported now)
+ * Copyright (C) 2019 Haren Myneni, IBM Corp
+ */
+
+#define pr_fmt(fmt) "vas-api: " fmt
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+#include <linux/mmu_context.h>
+#include <linux/io.h>
+#include <asm/vas.h>
+#include <uapi/asm/vas-api.h>
+
+/*
+ * The driver creates the device node that can be used as follows:
+ * For NX-GZIP
+ *
+ * fd = open("/dev/crypto/nx-gzip", O_RDWR);
+ * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
+ * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
+ * vas_copy(&crb, 0, 1);
+ * vas_paste(paste_addr, 0, 1);
+ * close(fd) or exit process to close window.
+ *
+ * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
+ * copy/paste returns to the user space directly. So refer NX hardware
+ * documentation for exact copy/paste usage and completion / error
+ * conditions.
+ */
+
+/*
+ * Wrapper object for the nx-gzip device - there is just one instance of
+ * this node for the whole system.
+ */
+static struct coproc_dev {
+ struct cdev cdev;
+ struct device *device;
+ char *name;
+ dev_t devt;
+ struct class *class;
+ enum vas_cop_type cop_type;
+ const struct vas_user_win_ops *vops;
+} coproc_device;
+
+struct coproc_instance {
+ struct coproc_dev *coproc;
+ struct vas_window *txwin;
+};
+
+static char *coproc_devnode(const struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
+}
+
+/*
+ * Take reference to pid and mm
+ */
+int get_vas_user_win_ref(struct vas_user_win_ref *task_ref)
+{
+ /*
+ * Window opened by a child thread may not be closed when
+ * it exits. So take reference to its pid and release it
+ * when the window is free by parent thread.
+ * Acquire a reference to the task's pid to make sure
+ * pid will not be re-used - needed only for multithread
+ * applications.
+ */
+ task_ref->pid = get_task_pid(current, PIDTYPE_PID);
+ /*
+ * Acquire a reference to the task's mm.
+ */
+ task_ref->mm = get_task_mm(current);
+ if (!task_ref->mm) {
+ put_pid(task_ref->pid);
+ pr_err("pid(%d): mm_struct is not found\n",
+ current->pid);
+ return -EPERM;
+ }
+
+ mmgrab(task_ref->mm);
+ mmput(task_ref->mm);
+ /*
+ * Process closes window during exit. In the case of
+ * multithread application, the child thread can open
+ * window and can exit without closing it. So takes tgid
+ * reference until window closed to make sure tgid is not
+ * reused.
+ */
+ task_ref->tgid = find_get_pid(task_tgid_vnr(current));
+
+ return 0;
+}
+
+/*
+ * Successful return must release the task reference with
+ * put_task_struct
+ */
+static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref,
+ struct task_struct **tskp, struct pid **pidp)
+{
+ struct task_struct *tsk;
+ struct pid *pid;
+
+ pid = task_ref->pid;
+ tsk = get_pid_task(pid, PIDTYPE_PID);
+ if (!tsk) {
+ pid = task_ref->tgid;
+ tsk = get_pid_task(pid, PIDTYPE_PID);
+ /*
+ * Parent thread (tgid) will be closing window when it
+ * exits. So should not get here.
+ */
+ if (WARN_ON_ONCE(!tsk))
+ return false;
+ }
+
+ /* Return if the task is exiting. */
+ if (tsk->flags & PF_EXITING) {
+ put_task_struct(tsk);
+ return false;
+ }
+
+ *tskp = tsk;
+ *pidp = pid;
+
+ return true;
+}
+
+/*
+ * Update the CSB to indicate a translation error.
+ *
+ * User space will be polling on CSB after the request is issued.
+ * If NX can handle the request without any issues, it updates CSB.
+ * Whereas if NX encounters page fault, the kernel will handle the
+ * fault and update CSB with translation error.
+ *
+ * If we are unable to update the CSB means copy_to_user failed due to
+ * invalid csb_addr, send a signal to the process.
+ */
+void vas_update_csb(struct coprocessor_request_block *crb,
+ struct vas_user_win_ref *task_ref)
+{
+ struct coprocessor_status_block csb;
+ struct kernel_siginfo info;
+ struct task_struct *tsk;
+ void __user *csb_addr;
+ struct pid *pid;
+ int rc;
+
+ /*
+ * NX user space windows can not be opened for task->mm=NULL
+ * and faults will not be generated for kernel requests.
+ */
+ if (WARN_ON_ONCE(!task_ref->mm))
+ return;
+
+ csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
+
+ memset(&csb, 0, sizeof(csb));
+ csb.cc = CSB_CC_FAULT_ADDRESS;
+ csb.ce = CSB_CE_TERMINATION;
+ csb.cs = 0;
+ csb.count = 0;
+
+ /*
+ * NX operates and returns in BE format as defined CRB struct.
+ * So saves fault_storage_addr in BE as NX pastes in FIFO and
+ * expects user space to convert to CPU format.
+ */
+ csb.address = crb->stamp.nx.fault_storage_addr;
+ csb.flags = 0;
+
+ /*
+ * Process closes send window after all pending NX requests are
+ * completed. In multi-thread applications, a child thread can
+ * open a window and can exit without closing it. May be some
+ * requests are pending or this window can be used by other
+ * threads later. We should handle faults if NX encounters
+ * pages faults on these requests. Update CSB with translation
+ * error and fault address. If csb_addr passed by user space is
+ * invalid, send SEGV signal to pid saved in window. If the
+ * child thread is not running, send the signal to tgid.
+ * Parent thread (tgid) will close this window upon its exit.
+ *
+ * pid and mm references are taken when window is opened by
+ * process (pid). So tgid is used only when child thread opens
+ * a window and exits without closing it.
+ */
+
+ if (!ref_get_pid_and_task(task_ref, &tsk, &pid))
+ return;
+
+ kthread_use_mm(task_ref->mm);
+ rc = copy_to_user(csb_addr, &csb, sizeof(csb));
+ /*
+ * User space polls on csb.flags (first byte). So add barrier
+ * then copy first byte with csb flags update.
+ */
+ if (!rc) {
+ csb.flags = CSB_V;
+ /* Make sure update to csb.flags is visible now */
+ smp_mb();
+ rc = copy_to_user(csb_addr, &csb, sizeof(u8));
+ }
+ kthread_unuse_mm(task_ref->mm);
+ put_task_struct(tsk);
+
+ /* Success */
+ if (!rc)
+ return;
+
+
+ pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
+ csb_addr, pid_vnr(pid));
+
+ clear_siginfo(&info);
+ info.si_signo = SIGSEGV;
+ info.si_errno = EFAULT;
+ info.si_code = SEGV_MAPERR;
+ info.si_addr = csb_addr;
+ /*
+ * process will be polling on csb.flags after request is sent to
+ * NX. So generally CSB update should not fail except when an
+ * application passes invalid csb_addr. So an error message will
+ * be displayed and leave it to user space whether to ignore or
+ * handle this signal.
+ */
+ rcu_read_lock();
+ rc = kill_pid_info(SIGSEGV, &info, pid);
+ rcu_read_unlock();
+
+ pr_devel("pid %d kill_proc_info() rc %d\n", pid_vnr(pid), rc);
+}
+
+void vas_dump_crb(struct coprocessor_request_block *crb)
+{
+ struct data_descriptor_entry *dde;
+ struct nx_fault_stamp *nx;
+
+ dde = &crb->source;
+ pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+ be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+ dde->count, dde->index, dde->flags);
+
+ dde = &crb->target;
+ pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+ be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+ dde->count, dde->index, dde->flags);
+
+ nx = &crb->stamp.nx;
+ pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
+ be32_to_cpu(nx->pswid),
+ be64_to_cpu(crb->stamp.nx.fault_storage_addr),
+ nx->flags, nx->fault_status);
+}
+
+static int coproc_open(struct inode *inode, struct file *fp)
+{
+ struct coproc_instance *cp_inst;
+
+ cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
+ if (!cp_inst)
+ return -ENOMEM;
+
+ cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
+ cdev);
+ fp->private_data = cp_inst;
+
+ return 0;
+}
+
+static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
+{
+ void __user *uptr = (void __user *)arg;
+ struct vas_tx_win_open_attr uattr;
+ struct coproc_instance *cp_inst;
+ struct vas_window *txwin;
+ int rc;
+
+ cp_inst = fp->private_data;
+
+ /*
+ * One window for file descriptor
+ */
+ if (cp_inst->txwin)
+ return -EEXIST;
+
+ rc = copy_from_user(&uattr, uptr, sizeof(uattr));
+ if (rc) {
+ pr_err("copy_from_user() returns %d\n", rc);
+ return -EFAULT;
+ }
+
+ if (uattr.version != 1) {
+ pr_err("Invalid window open API version\n");
+ return -EINVAL;
+ }
+
+ if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->open_win) {
+ pr_err("VAS API is not registered\n");
+ return -EACCES;
+ }
+
+ txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags,
+ cp_inst->coproc->cop_type);
+ if (IS_ERR(txwin)) {
+ pr_err_ratelimited("VAS window open failed rc=%ld\n",
+ PTR_ERR(txwin));
+ return PTR_ERR(txwin);
+ }
+
+ mutex_init(&txwin->task_ref.mmap_mutex);
+ cp_inst->txwin = txwin;
+
+ return 0;
+}
+
+static int coproc_release(struct inode *inode, struct file *fp)
+{
+ struct coproc_instance *cp_inst = fp->private_data;
+ int rc;
+
+ if (cp_inst->txwin) {
+ if (cp_inst->coproc->vops &&
+ cp_inst->coproc->vops->close_win) {
+ rc = cp_inst->coproc->vops->close_win(cp_inst->txwin);
+ if (rc)
+ return rc;
+ }
+ cp_inst->txwin = NULL;
+ }
+
+ kfree(cp_inst);
+ fp->private_data = NULL;
+
+ /*
+ * We don't know here if user has other receive windows
+ * open, so we can't really call clear_thread_tidr().
+ * So, once the process calls set_thread_tidr(), the
+ * TIDR value sticks around until process exits, resulting
+ * in an extra copy in restore_sprs().
+ */
+
+ return 0;
+}
+
+/*
+ * If the executed instruction that caused the fault was a paste, then
+ * clear regs CR0[EQ], advance NIP, and return 0. Else return error code.
+ */
+static int do_fail_paste(void)
+{
+ struct pt_regs *regs = current->thread.regs;
+ u32 instword;
+
+ if (WARN_ON_ONCE(!regs))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(!user_mode(regs)))
+ return -EINVAL;
+
+ /*
+ * If we couldn't translate the instruction, the driver should
+ * return success without handling the fault, it will be retried
+ * or the instruction fetch will fault.
+ */
+ if (get_user(instword, (u32 __user *)(regs->nip)))
+ return -EAGAIN;
+
+ /*
+ * Not a paste instruction, driver may fail the fault.
+ */
+ if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE)
+ return -ENOENT;
+
+ regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */
+ regs_add_return_ip(regs, 4); /* Emulate the paste */
+
+ return 0;
+}
+
+/*
+ * This fault handler is invoked when the core generates page fault on
+ * the paste address. Happens if the kernel closes window in hypervisor
+ * (on pseries) due to lost credit or the paste address is not mapped.
+ */
+static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct file *fp = vma->vm_file;
+ struct coproc_instance *cp_inst = fp->private_data;
+ struct vas_window *txwin;
+ vm_fault_t fault;
+ u64 paste_addr;
+ int ret;
+
+ /*
+ * window is not opened. Shouldn't expect this error.
+ */
+ if (!cp_inst || !cp_inst->txwin) {
+ pr_err("Unexpected fault on paste address with TX window closed\n");
+ return VM_FAULT_SIGBUS;
+ }
+
+ txwin = cp_inst->txwin;
+ /*
+ * When the LPAR lost credits due to core removal or during
+ * migration, invalidate the existing mapping for the current
+ * paste addresses and set windows in-active (zap_vma_pages in
+ * reconfig_close_windows()).
+ * New mapping will be done later after migration or new credits
+ * available. So continue to receive faults if the user space
+ * issue NX request.
+ */
+ if (txwin->task_ref.vma != vmf->vma) {
+ pr_err("No previous mapping with paste address\n");
+ return VM_FAULT_SIGBUS;
+ }
+
+ mutex_lock(&txwin->task_ref.mmap_mutex);
+ /*
+ * The window may be inactive due to lost credit (Ex: core
+ * removal with DLPAR). If the window is active again when
+ * the credit is available, map the new paste address at the
+ * window virtual address.
+ */
+ if (txwin->status == VAS_WIN_ACTIVE) {
+ paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+ if (paste_addr) {
+ fault = vmf_insert_pfn(vma, vma->vm_start,
+ (paste_addr >> PAGE_SHIFT));
+ mutex_unlock(&txwin->task_ref.mmap_mutex);
+ return fault;
+ }
+ }
+ mutex_unlock(&txwin->task_ref.mmap_mutex);
+
+ /*
+ * Received this fault due to closing the actual window.
+ * It can happen during migration or lost credits.
+ * Since no mapping, return the paste instruction failure
+ * to the user space.
+ */
+ ret = do_fail_paste();
+ /*
+ * The user space can retry several times until success (needed
+ * for migration) or should fallback to SW compression or
+ * manage with the existing open windows if available.
+ * Looking at sysfs interface, it can determine whether these
+ * failures are coming during migration or core removal:
+ * nr_used_credits > nr_total_credits when lost credits
+ */
+ if (!ret || (ret == -EAGAIN))
+ return VM_FAULT_NOPAGE;
+
+ return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct vas_vm_ops = {
+ .fault = vas_mmap_fault,
+};
+
+static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+ struct coproc_instance *cp_inst = fp->private_data;
+ struct vas_window *txwin;
+ unsigned long pfn;
+ u64 paste_addr;
+ pgprot_t prot;
+ int rc;
+
+ txwin = cp_inst->txwin;
+
+ if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+ pr_debug("size 0x%zx, PAGE_SIZE 0x%zx\n",
+ (vma->vm_end - vma->vm_start), PAGE_SIZE);
+ return -EINVAL;
+ }
+
+ /* Ensure instance has an open send window */
+ if (!txwin) {
+ pr_err("No send window open?\n");
+ return -EINVAL;
+ }
+
+ if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->paste_addr) {
+ pr_err("VAS API is not registered\n");
+ return -EACCES;
+ }
+
+ /*
+ * The initial mmap is done after the window is opened
+ * with ioctl. But before mmap(), this window can be closed in
+ * the hypervisor due to lost credit (core removal on pseries).
+ * So if the window is not active, return mmap() failure with
+ * -EACCES and expects the user space reissue mmap() when it
+ * is active again or open new window when the credit is available.
+ * mmap_mutex protects the paste address mmap() with DLPAR
+ * close/open event and allows mmap() only when the window is
+ * active.
+ */
+ mutex_lock(&txwin->task_ref.mmap_mutex);
+ if (txwin->status != VAS_WIN_ACTIVE) {
+ pr_err("Window is not active\n");
+ rc = -EACCES;
+ goto out;
+ }
+
+ paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+ if (!paste_addr) {
+ pr_err("Window paste address failed\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ pfn = paste_addr >> PAGE_SHIFT;
+
+ /* flags, page_prot from cxl_mmap(), except we want cachable */
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+
+ prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
+
+ rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, prot);
+
+ pr_devel("paste addr %llx at %lx, rc %d\n", paste_addr,
+ vma->vm_start, rc);
+
+ txwin->task_ref.vma = vma;
+ vma->vm_ops = &vas_vm_ops;
+
+out:
+ mutex_unlock(&txwin->task_ref.mmap_mutex);
+ return rc;
+}
+
+static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case VAS_TX_WIN_OPEN:
+ return coproc_ioc_tx_win_open(fp, arg);
+ default:
+ return -EINVAL;
+ }
+}
+
+static struct file_operations coproc_fops = {
+ .open = coproc_open,
+ .release = coproc_release,
+ .mmap = coproc_mmap,
+ .unlocked_ioctl = coproc_ioctl,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
+ const char *name,
+ const struct vas_user_win_ops *vops)
+{
+ int rc = -EINVAL;
+ dev_t devno;
+
+ rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
+ if (rc) {
+ pr_err("Unable to allocate coproc major number: %i\n", rc);
+ return rc;
+ }
+
+ pr_devel("%s device allocated, dev [%i,%i]\n", name,
+ MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
+
+ coproc_device.class = class_create(name);
+ if (IS_ERR(coproc_device.class)) {
+ rc = PTR_ERR(coproc_device.class);
+ pr_err("Unable to create %s class %d\n", name, rc);
+ goto err_class;
+ }
+ coproc_device.class->devnode = coproc_devnode;
+ coproc_device.cop_type = cop_type;
+ coproc_device.vops = vops;
+
+ coproc_fops.owner = mod;
+ cdev_init(&coproc_device.cdev, &coproc_fops);
+
+ devno = MKDEV(MAJOR(coproc_device.devt), 0);
+ rc = cdev_add(&coproc_device.cdev, devno, 1);
+ if (rc) {
+ pr_err("cdev_add() failed %d\n", rc);
+ goto err_cdev;
+ }
+
+ coproc_device.device = device_create(coproc_device.class, NULL,
+ devno, NULL, name, MINOR(devno));
+ if (IS_ERR(coproc_device.device)) {
+ rc = PTR_ERR(coproc_device.device);
+ pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
+ goto err;
+ }
+
+ pr_devel("Added dev [%d,%d]\n", MAJOR(devno), MINOR(devno));
+
+ return 0;
+
+err:
+ cdev_del(&coproc_device.cdev);
+err_cdev:
+ class_destroy(coproc_device.class);
+err_class:
+ unregister_chrdev_region(coproc_device.devt, 1);
+ return rc;
+}
+
+void vas_unregister_coproc_api(void)
+{
+ dev_t devno;
+
+ cdev_del(&coproc_device.cdev);
+ devno = MKDEV(MAJOR(coproc_device.devt), 0);
+ device_destroy(coproc_device.class, devno);
+
+ class_destroy(coproc_device.class);
+ unregister_chrdev_region(coproc_device.devt, 1);
+}
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
new file mode 100644
index 000000000..34669b060
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -0,0 +1,104 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_CELL
+ select PPC_64S_HASH_MMU if PPC64
+ bool
+
+config PPC_CELL_COMMON
+ bool
+ select PPC_CELL
+ select PPC_DCR_MMIO
+ select PPC_INDIRECT_PIO
+ select PPC_INDIRECT_MMIO
+ select PPC_HASH_MMU_NATIVE
+ select PPC_RTAS
+ select IRQ_EDGE_EOI_HANDLER
+
+config PPC_CELL_NATIVE
+ bool
+ select PPC_CELL_COMMON
+ select MPIC
+ select PPC_IO_WORKAROUNDS
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC #test only
+ select IBM_EMAC_TAH if IBM_EMAC #test only
+
+config PPC_IBM_CELL_BLADE
+ bool "IBM Cell Blade"
+ depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
+ select PPC_CELL_NATIVE
+ select PPC_OF_PLATFORM_PCI
+ select FORCE_PCI
+ select MMIO_NVRAM
+ select PPC_UDBG_16550
+ select UDBG_RTAS_CONSOLE
+
+config AXON_MSI
+ bool
+ depends on PPC_IBM_CELL_BLADE && PCI_MSI
+ select IRQ_DOMAIN_NOMAP
+ default y
+
+menu "Cell Broadband Engine options"
+ depends on PPC_CELL
+
+config SPU_FS
+ tristate "SPU file system"
+ default m
+ depends on PPC_CELL
+ depends on COREDUMP
+ select SPU_BASE
+ help
+ The SPU file system is used to access Synergistic Processing
+ Units on machines implementing the Broadband Processor
+ Architecture.
+
+config SPU_BASE
+ bool
+ select PPC_COPRO_BASE
+
+config CBE_RAS
+ bool "RAS features for bare metal Cell BE"
+ depends on PPC_CELL_NATIVE
+ default y
+
+config PPC_IBM_CELL_RESETBUTTON
+ bool "IBM Cell Blade Pinhole reset button"
+ depends on CBE_RAS && PPC_IBM_CELL_BLADE
+ default y
+ help
+ Support Pinhole Resetbutton on IBM Cell blades.
+ This adds a method to trigger system reset via front panel pinhole button.
+
+config PPC_IBM_CELL_POWERBUTTON
+ tristate "IBM Cell Blade power button"
+ depends on PPC_IBM_CELL_BLADE && INPUT_EVDEV
+ default y
+ help
+ Support Powerbutton on IBM Cell blades.
+ This will enable the powerbutton as an input device.
+
+config CBE_THERM
+ tristate "CBE thermal support"
+ default m
+ depends on CBE_RAS && SPU_BASE
+
+config PPC_PMI
+ tristate
+ default y
+ depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON
+ help
+ PMI (Platform Management Interrupt) is a way to
+ communicate with the BMC (Baseboard Management Controller).
+ It is used in some IBM Cell blades.
+
+config CBE_CPUFREQ_SPU_GOVERNOR
+ tristate "CBE frequency scaling based on SPU usage"
+ depends on SPU_FS && CPU_FREQ
+ default m
+ help
+ This governor checks for spu usage to adjust the cpu frequency.
+ If no spu is running on a given cpu, that cpu will be throttled to
+ the minimal possible frequency.
+
+endmenu
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
new file mode 100644
index 000000000..7ea6692f6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PPC_CELL_COMMON) += cbe_regs.o interrupt.o pervasive.o
+
+obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \
+ pmu.o spider-pci.o
+obj-$(CONFIG_CBE_RAS) += ras.o
+
+obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
+obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o
+
+obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o
+
+ifdef CONFIG_SMP
+obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o
+endif
+
+# needed only when building loadable spufs.ko
+spu-priv1-$(CONFIG_PPC_CELL_COMMON) += spu_priv1_mmio.o
+spu-manage-$(CONFIG_PPC_CELL_COMMON) += spu_manage.o
+
+obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \
+ spu_syscalls.o \
+ $(spu-priv1-y) \
+ $(spu-manage-y) \
+ spufs/
+
+obj-$(CONFIG_AXON_MSI) += axon_msi.o
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
new file mode 100644
index 000000000..28dc86744
--- /dev/null
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2007, Michael Ellerman, IBM Corporation.
+ */
+
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#include <asm/dcr.h>
+#include <asm/machdep.h>
+
+#include "cell.h"
+
+/*
+ * MSIC registers, specified as offsets from dcr_base
+ */
+#define MSIC_CTRL_REG 0x0
+
+/* Base Address registers specify FIFO location in BE memory */
+#define MSIC_BASE_ADDR_HI_REG 0x3
+#define MSIC_BASE_ADDR_LO_REG 0x4
+
+/* Hold the read/write offsets into the FIFO */
+#define MSIC_READ_OFFSET_REG 0x5
+#define MSIC_WRITE_OFFSET_REG 0x6
+
+
+/* MSIC control register flags */
+#define MSIC_CTRL_ENABLE 0x0001
+#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002
+#define MSIC_CTRL_IRQ_ENABLE 0x0008
+#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010
+
+/*
+ * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
+ * Currently we're using a 64KB FIFO size.
+ */
+#define MSIC_FIFO_SIZE_SHIFT 16
+#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT)
+
+/*
+ * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
+ * 8-9 of the MSIC control reg.
+ */
+#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
+
+/*
+ * We need to mask the read/write offsets to make sure they stay within
+ * the bounds of the FIFO. Also they should always be 16-byte aligned.
+ */
+#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
+
+/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
+#define MSIC_FIFO_ENTRY_SIZE 0x10
+
+
+struct axon_msic {
+ struct irq_domain *irq_domain;
+ __le32 *fifo_virt;
+ dma_addr_t fifo_phys;
+ dcr_host_t dcr_host;
+ u32 read_offset;
+#ifdef DEBUG
+ u32 __iomem *trigger;
+#endif
+};
+
+#ifdef DEBUG
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic);
+#else
+static inline void axon_msi_debug_setup(struct device_node *dn,
+ struct axon_msic *msic) { }
+#endif
+
+
+static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
+{
+ pr_devel("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
+
+ dcr_write(msic->dcr_host, dcr_n, val);
+}
+
+static void axon_msi_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct axon_msic *msic = irq_desc_get_handler_data(desc);
+ u32 write_offset, msi;
+ int idx;
+ int retry = 0;
+
+ write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
+ pr_devel("axon_msi: original write_offset 0x%x\n", write_offset);
+
+ /* write_offset doesn't wrap properly, so we have to mask it */
+ write_offset &= MSIC_FIFO_SIZE_MASK;
+
+ while (msic->read_offset != write_offset && retry < 100) {
+ idx = msic->read_offset / sizeof(__le32);
+ msi = le32_to_cpu(msic->fifo_virt[idx]);
+ msi &= 0xFFFF;
+
+ pr_devel("axon_msi: woff %x roff %x msi %x\n",
+ write_offset, msic->read_offset, msi);
+
+ if (msi < nr_irqs && irq_get_chip_data(msi) == msic) {
+ generic_handle_irq(msi);
+ msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
+ } else {
+ /*
+ * Reading the MSIC_WRITE_OFFSET_REG does not
+ * reliably flush the outstanding DMA to the
+ * FIFO buffer. Here we were reading stale
+ * data, so we need to retry.
+ */
+ udelay(1);
+ retry++;
+ pr_devel("axon_msi: invalid irq 0x%x!\n", msi);
+ continue;
+ }
+
+ if (retry) {
+ pr_devel("axon_msi: late irq 0x%x, retry %d\n",
+ msi, retry);
+ retry = 0;
+ }
+
+ msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+ msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+ }
+
+ if (retry) {
+ printk(KERN_WARNING "axon_msi: irq timed out\n");
+
+ msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+ msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+ }
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+static struct axon_msic *find_msi_translator(struct pci_dev *dev)
+{
+ struct irq_domain *irq_domain;
+ struct device_node *dn, *tmp;
+ const phandle *ph;
+ struct axon_msic *msic = NULL;
+
+ dn = of_node_get(pci_device_to_OF_node(dev));
+ if (!dn) {
+ dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+ return NULL;
+ }
+
+ for (; dn; dn = of_get_next_parent(dn)) {
+ ph = of_get_property(dn, "msi-translator", NULL);
+ if (ph)
+ break;
+ }
+
+ if (!ph) {
+ dev_dbg(&dev->dev,
+ "axon_msi: no msi-translator property found\n");
+ goto out_error;
+ }
+
+ tmp = dn;
+ dn = of_find_node_by_phandle(*ph);
+ of_node_put(tmp);
+ if (!dn) {
+ dev_dbg(&dev->dev,
+ "axon_msi: msi-translator doesn't point to a node\n");
+ goto out_error;
+ }
+
+ irq_domain = irq_find_host(dn);
+ if (!irq_domain) {
+ dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %pOF\n",
+ dn);
+ goto out_error;
+ }
+
+ msic = irq_domain->host_data;
+
+out_error:
+ of_node_put(dn);
+
+ return msic;
+}
+
+static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
+{
+ struct device_node *dn;
+ int len;
+ const u32 *prop;
+
+ dn = of_node_get(pci_device_to_OF_node(dev));
+ if (!dn) {
+ dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+ return -ENODEV;
+ }
+
+ for (; dn; dn = of_get_next_parent(dn)) {
+ if (!dev->no_64bit_msi) {
+ prop = of_get_property(dn, "msi-address-64", &len);
+ if (prop)
+ break;
+ }
+
+ prop = of_get_property(dn, "msi-address-32", &len);
+ if (prop)
+ break;
+ }
+
+ if (!prop) {
+ dev_dbg(&dev->dev,
+ "axon_msi: no msi-address-(32|64) properties found\n");
+ of_node_put(dn);
+ return -ENOENT;
+ }
+
+ switch (len) {
+ case 8:
+ msg->address_hi = prop[0];
+ msg->address_lo = prop[1];
+ break;
+ case 4:
+ msg->address_hi = 0;
+ msg->address_lo = prop[0];
+ break;
+ default:
+ dev_dbg(&dev->dev,
+ "axon_msi: malformed msi-address-(32|64) property\n");
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ of_node_put(dn);
+
+ return 0;
+}
+
+static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ unsigned int virq, rc;
+ struct msi_desc *entry;
+ struct msi_msg msg;
+ struct axon_msic *msic;
+
+ msic = find_msi_translator(dev);
+ if (!msic)
+ return -ENODEV;
+
+ rc = setup_msi_msg_address(dev, &msg);
+ if (rc)
+ return rc;
+
+ msi_for_each_desc(entry, &dev->dev, MSI_DESC_NOTASSOCIATED) {
+ virq = irq_create_direct_mapping(msic->irq_domain);
+ if (!virq) {
+ dev_warn(&dev->dev,
+ "axon_msi: virq allocation failed!\n");
+ return -1;
+ }
+ dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
+
+ irq_set_msi_desc(virq, entry);
+ msg.data = virq;
+ pci_write_msi_msg(virq, &msg);
+ }
+
+ return 0;
+}
+
+static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+ struct msi_desc *entry;
+
+ dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
+
+ msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) {
+ irq_set_msi_desc(entry->irq, NULL);
+ irq_dispose_mapping(entry->irq);
+ entry->irq = 0;
+ }
+}
+
+static struct irq_chip msic_irq_chip = {
+ .irq_mask = pci_msi_mask_irq,
+ .irq_unmask = pci_msi_unmask_irq,
+ .irq_shutdown = pci_msi_mask_irq,
+ .name = "AXON-MSI",
+};
+
+static int msic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ irq_set_chip_data(virq, h->host_data);
+ irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops msic_host_ops = {
+ .map = msic_host_map,
+};
+
+static void axon_msi_shutdown(struct platform_device *device)
+{
+ struct axon_msic *msic = dev_get_drvdata(&device->dev);
+ u32 tmp;
+
+ pr_devel("axon_msi: disabling %pOF\n",
+ irq_domain_get_of_node(msic->irq_domain));
+ tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
+ tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
+ msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
+}
+
+static int axon_msi_probe(struct platform_device *device)
+{
+ struct device_node *dn = device->dev.of_node;
+ struct axon_msic *msic;
+ unsigned int virq;
+ int dcr_base, dcr_len;
+
+ pr_devel("axon_msi: setting up dn %pOF\n", dn);
+
+ msic = kzalloc(sizeof(*msic), GFP_KERNEL);
+ if (!msic) {
+ printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n",
+ dn);
+ goto out;
+ }
+
+ dcr_base = dcr_resource_start(dn, 0);
+ dcr_len = dcr_resource_len(dn, 0);
+
+ if (dcr_base == 0 || dcr_len == 0) {
+ printk(KERN_ERR
+ "axon_msi: couldn't parse dcr properties on %pOF\n",
+ dn);
+ goto out_free_msic;
+ }
+
+ msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
+ if (!DCR_MAP_OK(msic->dcr_host)) {
+ printk(KERN_ERR "axon_msi: dcr_map failed for %pOF\n",
+ dn);
+ goto out_free_msic;
+ }
+
+ msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
+ &msic->fifo_phys, GFP_KERNEL);
+ if (!msic->fifo_virt) {
+ printk(KERN_ERR "axon_msi: couldn't allocate fifo for %pOF\n",
+ dn);
+ goto out_free_msic;
+ }
+
+ virq = irq_of_parse_and_map(dn, 0);
+ if (!virq) {
+ printk(KERN_ERR "axon_msi: irq parse and map failed for %pOF\n",
+ dn);
+ goto out_free_fifo;
+ }
+ memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
+
+ /* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */
+ msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic);
+ if (!msic->irq_domain) {
+ printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %pOF\n",
+ dn);
+ goto out_free_fifo;
+ }
+
+ irq_set_handler_data(virq, msic);
+ irq_set_chained_handler(virq, axon_msi_cascade);
+ pr_devel("axon_msi: irq 0x%x setup for axon_msi\n", virq);
+
+ /* Enable the MSIC hardware */
+ msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32);
+ msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
+ msic->fifo_phys & 0xFFFFFFFF);
+ msic_dcr_write(msic, MSIC_CTRL_REG,
+ MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
+ MSIC_CTRL_FIFO_SIZE);
+
+ msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
+ & MSIC_FIFO_SIZE_MASK;
+
+ dev_set_drvdata(&device->dev, msic);
+
+ cell_pci_controller_ops.setup_msi_irqs = axon_msi_setup_msi_irqs;
+ cell_pci_controller_ops.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+
+ axon_msi_debug_setup(dn, msic);
+
+ printk(KERN_DEBUG "axon_msi: setup MSIC on %pOF\n", dn);
+
+ return 0;
+
+out_free_fifo:
+ dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt,
+ msic->fifo_phys);
+out_free_msic:
+ kfree(msic);
+out:
+
+ return -1;
+}
+
+static const struct of_device_id axon_msi_device_id[] = {
+ {
+ .compatible = "ibm,axon-msic"
+ },
+ {}
+};
+
+static struct platform_driver axon_msi_driver = {
+ .probe = axon_msi_probe,
+ .shutdown = axon_msi_shutdown,
+ .driver = {
+ .name = "axon-msi",
+ .of_match_table = axon_msi_device_id,
+ },
+};
+
+static int __init axon_msi_init(void)
+{
+ return platform_driver_register(&axon_msi_driver);
+}
+subsys_initcall(axon_msi_init);
+
+
+#ifdef DEBUG
+static int msic_set(void *data, u64 val)
+{
+ struct axon_msic *msic = data;
+ out_le32(msic->trigger, val);
+ return 0;
+}
+
+static int msic_get(void *data, u64 *val)
+{
+ *val = 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n");
+
+void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
+{
+ char name[8];
+ struct resource res;
+
+ if (of_address_to_resource(dn, 0, &res)) {
+ pr_devel("axon_msi: couldn't get reg property\n");
+ return;
+ }
+
+ msic->trigger = ioremap(res.start, 0x4);
+ if (!msic->trigger) {
+ pr_devel("axon_msi: ioremap failed\n");
+ return;
+ }
+
+ snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
+
+ debugfs_create_file(name, 0600, arch_debugfs_dir, msic, &fops_msic);
+}
+#endif /* DEBUG */
diff --git a/arch/powerpc/platforms/cell/cbe_powerbutton.c b/arch/powerpc/platforms/cell/cbe_powerbutton.c
new file mode 100644
index 000000000..a3ee39748
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_powerbutton.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * driver for powerbutton on IBM cell blades
+ *
+ * (C) Copyright IBM Corp. 2005-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ */
+
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <asm/pmi.h>
+
+static struct input_dev *button_dev;
+static struct platform_device *button_pdev;
+
+static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg)
+{
+ BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON);
+
+ input_report_key(button_dev, KEY_POWER, 1);
+ input_sync(button_dev);
+ input_report_key(button_dev, KEY_POWER, 0);
+ input_sync(button_dev);
+}
+
+static struct pmi_handler cbe_pmi_handler = {
+ .type = PMI_TYPE_POWER_BUTTON,
+ .handle_pmi_message = cbe_powerbutton_handle_pmi,
+};
+
+static int __init cbe_powerbutton_init(void)
+{
+ int ret = 0;
+ struct input_dev *dev;
+
+ if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) {
+ printk(KERN_ERR "%s: Not a cell blade.\n", __func__);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ dev = input_allocate_device();
+ if (!dev) {
+ ret = -ENOMEM;
+ printk(KERN_ERR "%s: Not enough memory.\n", __func__);
+ goto out;
+ }
+
+ set_bit(EV_KEY, dev->evbit);
+ set_bit(KEY_POWER, dev->keybit);
+
+ dev->name = "Power Button";
+ dev->id.bustype = BUS_HOST;
+
+ /* this makes the button look like an acpi power button
+ * no clue whether anyone relies on that though */
+ dev->id.product = 0x02;
+ dev->phys = "LNXPWRBN/button/input0";
+
+ button_pdev = platform_device_register_simple("power_button", 0, NULL, 0);
+ if (IS_ERR(button_pdev)) {
+ ret = PTR_ERR(button_pdev);
+ goto out_free_input;
+ }
+
+ dev->dev.parent = &button_pdev->dev;
+ ret = input_register_device(dev);
+ if (ret) {
+ printk(KERN_ERR "%s: Failed to register device\n", __func__);
+ goto out_free_pdev;
+ }
+
+ button_dev = dev;
+
+ ret = pmi_register_handler(&cbe_pmi_handler);
+ if (ret) {
+ printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__);
+ goto out_free_pdev;
+ }
+
+ goto out;
+
+out_free_pdev:
+ platform_device_unregister(button_pdev);
+out_free_input:
+ input_free_device(dev);
+out:
+ return ret;
+}
+
+static void __exit cbe_powerbutton_exit(void)
+{
+ pmi_unregister_handler(&cbe_pmi_handler);
+ platform_device_unregister(button_pdev);
+ input_free_device(button_dev);
+}
+
+module_init(cbe_powerbutton_init);
+module_exit(cbe_powerbutton_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
new file mode 100644
index 000000000..99b355875
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * cbe_regs.c
+ *
+ * Accessor routines for the various MMIO register blocks of the CBE
+ *
+ * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
+ */
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/pgtable.h>
+
+#include <asm/io.h>
+#include <asm/ptrace.h>
+#include <asm/cell-regs.h>
+
+/*
+ * Current implementation uses "cpu" nodes. We build our own mapping
+ * array of cpu numbers to cpu nodes locally for now to allow interrupt
+ * time code to have a fast path rather than call of_get_cpu_node(). If
+ * we implement cpu hotplug, we'll have to install an appropriate notifier
+ * in order to release references to the cpu going away
+ */
+static struct cbe_regs_map
+{
+ struct device_node *cpu_node;
+ struct device_node *be_node;
+ struct cbe_pmd_regs __iomem *pmd_regs;
+ struct cbe_iic_regs __iomem *iic_regs;
+ struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+ struct cbe_pmd_shadow_regs pmd_shadow_regs;
+} cbe_regs_maps[MAX_CBE];
+static int cbe_regs_map_count;
+
+static struct cbe_thread_map
+{
+ struct device_node *cpu_node;
+ struct device_node *be_node;
+ struct cbe_regs_map *regs;
+ unsigned int thread_id;
+ unsigned int cbe_id;
+} cbe_thread_map[NR_CPUS];
+
+static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
+static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
+
+static struct cbe_regs_map *cbe_find_map(struct device_node *np)
+{
+ int i;
+ struct device_node *tmp_np;
+
+ if (!of_node_is_type(np, "spe")) {
+ for (i = 0; i < cbe_regs_map_count; i++)
+ if (cbe_regs_maps[i].cpu_node == np ||
+ cbe_regs_maps[i].be_node == np)
+ return &cbe_regs_maps[i];
+ return NULL;
+ }
+
+ if (np->data)
+ return np->data;
+
+ /* walk up path until cpu or be node was found */
+ tmp_np = np;
+ do {
+ tmp_np = tmp_np->parent;
+ /* on a correct devicetree we wont get up to root */
+ BUG_ON(!tmp_np);
+ } while (!of_node_is_type(tmp_np, "cpu") ||
+ !of_node_is_type(tmp_np, "be"));
+
+ np->data = cbe_find_map(tmp_np);
+
+ return np->data;
+}
+
+struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_pmd_regs);
+
+struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return map->pmd_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs);
+
+struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return &map->pmd_shadow_regs;
+}
+
+struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return &map->pmd_shadow_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return map->iic_regs;
+}
+
+struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return map->iic_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np)
+{
+ struct cbe_regs_map *map = cbe_find_map(np);
+ if (map == NULL)
+ return NULL;
+ return map->mic_tm_regs;
+}
+
+struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
+{
+ struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
+ if (map == NULL)
+ return NULL;
+ return map->mic_tm_regs;
+}
+EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
+
+u32 cbe_get_hw_thread_id(int cpu)
+{
+ return cbe_thread_map[cpu].thread_id;
+}
+EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
+
+u32 cbe_cpu_to_node(int cpu)
+{
+ return cbe_thread_map[cpu].cbe_id;
+}
+EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
+
+u32 cbe_node_to_cpu(int node)
+{
+ return cpumask_first(&cbe_local_mask[node]);
+
+}
+EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
+
+static struct device_node *__init cbe_get_be_node(int cpu_id)
+{
+ struct device_node *np;
+
+ for_each_node_by_type (np, "be") {
+ int len,i;
+ const phandle *cpu_handle;
+
+ cpu_handle = of_get_property(np, "cpus", &len);
+
+ /*
+ * the CAB SLOF tree is non compliant, so we just assume
+ * there is only one node
+ */
+ if (WARN_ON_ONCE(!cpu_handle))
+ return np;
+
+ for (i = 0; i < len; i++) {
+ struct device_node *ch_np = of_find_node_by_phandle(cpu_handle[i]);
+ struct device_node *ci_np = of_get_cpu_node(cpu_id, NULL);
+
+ of_node_put(ch_np);
+ of_node_put(ci_np);
+
+ if (ch_np == ci_np)
+ return np;
+ }
+ }
+
+ return NULL;
+}
+
+static void __init cbe_fill_regs_map(struct cbe_regs_map *map)
+{
+ if(map->be_node) {
+ struct device_node *be, *np, *parent_np;
+
+ be = map->be_node;
+
+ for_each_node_by_type(np, "pervasive") {
+ parent_np = of_get_parent(np);
+ if (parent_np == be)
+ map->pmd_regs = of_iomap(np, 0);
+ of_node_put(parent_np);
+ }
+
+ for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller") {
+ parent_np = of_get_parent(np);
+ if (parent_np == be)
+ map->iic_regs = of_iomap(np, 2);
+ of_node_put(parent_np);
+ }
+
+ for_each_node_by_type(np, "mic-tm") {
+ parent_np = of_get_parent(np);
+ if (parent_np == be)
+ map->mic_tm_regs = of_iomap(np, 0);
+ of_node_put(parent_np);
+ }
+ } else {
+ struct device_node *cpu;
+ /* That hack must die die die ! */
+ const struct address_prop {
+ unsigned long address;
+ unsigned int len;
+ } __attribute__((packed)) *prop;
+
+ cpu = map->cpu_node;
+
+ prop = of_get_property(cpu, "pervasive", NULL);
+ if (prop != NULL)
+ map->pmd_regs = ioremap(prop->address, prop->len);
+
+ prop = of_get_property(cpu, "iic", NULL);
+ if (prop != NULL)
+ map->iic_regs = ioremap(prop->address, prop->len);
+
+ prop = of_get_property(cpu, "mic-tm", NULL);
+ if (prop != NULL)
+ map->mic_tm_regs = ioremap(prop->address, prop->len);
+ }
+}
+
+
+void __init cbe_regs_init(void)
+{
+ int i;
+ unsigned int thread_id;
+ struct device_node *cpu;
+
+ /* Build local fast map of CPUs */
+ for_each_possible_cpu(i) {
+ cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
+ cbe_thread_map[i].be_node = cbe_get_be_node(i);
+ cbe_thread_map[i].thread_id = thread_id;
+ }
+
+ /* Find maps for each device tree CPU */
+ for_each_node_by_type(cpu, "cpu") {
+ struct cbe_regs_map *map;
+ unsigned int cbe_id;
+
+ cbe_id = cbe_regs_map_count++;
+ map = &cbe_regs_maps[cbe_id];
+
+ if (cbe_regs_map_count > MAX_CBE) {
+ printk(KERN_ERR "cbe_regs: More BE chips than supported"
+ "!\n");
+ cbe_regs_map_count--;
+ of_node_put(cpu);
+ return;
+ }
+ of_node_put(map->cpu_node);
+ map->cpu_node = of_node_get(cpu);
+
+ for_each_possible_cpu(i) {
+ struct cbe_thread_map *thread = &cbe_thread_map[i];
+
+ if (thread->cpu_node == cpu) {
+ thread->regs = map;
+ thread->cbe_id = cbe_id;
+ map->be_node = thread->be_node;
+ cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
+ if(thread->thread_id == 0)
+ cpumask_set_cpu(i, &cbe_first_online_cpu);
+ }
+ }
+
+ cbe_fill_regs_map(map);
+ }
+}
+
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
new file mode 100644
index 000000000..2f45428e3
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * thermal support for the cell processor
+ *
+ * This module adds some sysfs attributes to cpu and spu nodes.
+ * Base for measurements are the digital thermal sensors (DTS)
+ * located on the chip.
+ * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius
+ * The attributes can be found under
+ * /sys/devices/system/cpu/cpuX/thermal
+ * /sys/devices/system/spu/spuX/thermal
+ *
+ * The following attributes are added for each node:
+ * temperature:
+ * contains the current temperature measured by the DTS
+ * throttle_begin:
+ * throttling begins when temperature is greater or equal to
+ * throttle_begin. Setting this value to 125 prevents throttling.
+ * throttle_end:
+ * throttling is being ceased, if the temperature is lower than
+ * throttle_end. Due to a delay between applying throttling and
+ * a reduced temperature this value should be less than throttle_begin.
+ * A value equal to throttle_begin provides only a very little hysteresis.
+ * throttle_full_stop:
+ * If the temperatrue is greater or equal to throttle_full_stop,
+ * full throttling is applied to the cpu or spu. This value should be
+ * greater than throttle_begin and throttle_end. Setting this value to
+ * 65 prevents the unit from running code at all.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/stringify.h>
+#include <asm/spu.h>
+#include <asm/io.h>
+#include <asm/cell-regs.h>
+
+#include "spu_priv1_mmio.h"
+
+#define TEMP_MIN 65
+#define TEMP_MAX 125
+
+#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode) \
+struct device_attribute attr_ ## _prefix ## _ ## _name = { \
+ .attr = { .name = __stringify(_name), .mode = _mode }, \
+ .show = _prefix ## _show_ ## _name, \
+ .store = _prefix ## _store_ ## _name, \
+};
+
+static inline u8 reg_to_temp(u8 reg_value)
+{
+ return ((reg_value & 0x3f) << 1) + TEMP_MIN;
+}
+
+static inline u8 temp_to_reg(u8 temp)
+{
+ return ((temp - TEMP_MIN) >> 1) & 0x3f;
+}
+
+static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev)
+{
+ struct spu *spu;
+
+ spu = container_of(dev, struct spu, dev);
+
+ return cbe_get_pmd_regs(spu_devnode(spu));
+}
+
+/* returns the value for a given spu in a given register */
+static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg)
+{
+ union spe_reg value;
+ struct spu *spu;
+
+ spu = container_of(dev, struct spu, dev);
+ value.val = in_be64(&reg->val);
+
+ return value.spe[spu->spe_id];
+}
+
+static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ u8 value;
+ struct cbe_pmd_regs __iomem *pmd_regs;
+
+ pmd_regs = get_pmd_regs(dev);
+
+ value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1);
+
+ return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos)
+{
+ u64 value;
+
+ value = in_be64(&pmd_regs->tm_tpr.val);
+ /* access the corresponding byte */
+ value >>= pos;
+ value &= 0x3F;
+
+ return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
+{
+ u64 reg_value;
+ unsigned int temp;
+ u64 new_value;
+ int ret;
+
+ ret = sscanf(buf, "%u", &temp);
+
+ if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX)
+ return -EINVAL;
+
+ new_value = temp_to_reg(temp);
+
+ reg_value = in_be64(&pmd_regs->tm_tpr.val);
+
+ /* zero out bits for new value */
+ reg_value &= ~(0xffull << pos);
+ /* set bits to new value */
+ reg_value |= new_value << pos;
+
+ out_be64(&pmd_regs->tm_tpr.val, reg_value);
+ return size;
+}
+
+static ssize_t spu_show_throttle_end(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(get_pmd_regs(dev), buf, 0);
+}
+
+static ssize_t spu_show_throttle_begin(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(get_pmd_regs(dev), buf, 8);
+}
+
+static ssize_t spu_show_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(get_pmd_regs(dev), buf, 16);
+}
+
+static ssize_t spu_store_throttle_end(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(get_pmd_regs(dev), buf, size, 0);
+}
+
+static ssize_t spu_store_throttle_begin(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(get_pmd_regs(dev), buf, size, 8);
+}
+
+static ssize_t spu_store_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(get_pmd_regs(dev), buf, size, 16);
+}
+
+static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos)
+{
+ struct cbe_pmd_regs __iomem *pmd_regs;
+ u64 value;
+
+ pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+ value = in_be64(&pmd_regs->ts_ctsr2);
+
+ value = (value >> pos) & 0x3f;
+
+ return sprintf(buf, "%d\n", reg_to_temp(value));
+}
+
+
+/* shows the temperature of the DTS on the PPE,
+ * located near the linear thermal sensor */
+static ssize_t ppe_show_temp0(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return ppe_show_temp(dev, buf, 32);
+}
+
+/* shows the temperature of the second DTS on the PPE */
+static ssize_t ppe_show_temp1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return ppe_show_temp(dev, buf, 0);
+}
+
+static ssize_t ppe_show_throttle_end(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32);
+}
+
+static ssize_t ppe_show_throttle_begin(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40);
+}
+
+static ssize_t ppe_show_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48);
+}
+
+static ssize_t ppe_store_throttle_end(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32);
+}
+
+static ssize_t ppe_store_throttle_begin(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40);
+}
+
+static ssize_t ppe_store_throttle_full_stop(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48);
+}
+
+
+static struct device_attribute attr_spu_temperature = {
+ .attr = {.name = "temperature", .mode = 0400 },
+ .show = spu_show_temp,
+};
+
+static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600);
+
+
+static struct attribute *spu_attributes[] = {
+ &attr_spu_temperature.attr,
+ &attr_spu_throttle_end.attr,
+ &attr_spu_throttle_begin.attr,
+ &attr_spu_throttle_full_stop.attr,
+ NULL,
+};
+
+static const struct attribute_group spu_attribute_group = {
+ .name = "thermal",
+ .attrs = spu_attributes,
+};
+
+static struct device_attribute attr_ppe_temperature0 = {
+ .attr = {.name = "temperature0", .mode = 0400 },
+ .show = ppe_show_temp0,
+};
+
+static struct device_attribute attr_ppe_temperature1 = {
+ .attr = {.name = "temperature1", .mode = 0400 },
+ .show = ppe_show_temp1,
+};
+
+static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600);
+static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
+
+static struct attribute *ppe_attributes[] = {
+ &attr_ppe_temperature0.attr,
+ &attr_ppe_temperature1.attr,
+ &attr_ppe_throttle_end.attr,
+ &attr_ppe_throttle_begin.attr,
+ &attr_ppe_throttle_full_stop.attr,
+ NULL,
+};
+
+static struct attribute_group ppe_attribute_group = {
+ .name = "thermal",
+ .attrs = ppe_attributes,
+};
+
+/*
+ * initialize throttling with default values
+ */
+static int __init init_default_values(void)
+{
+ int cpu;
+ struct cbe_pmd_regs __iomem *pmd_regs;
+ struct device *dev;
+ union ppe_spe_reg tpr;
+ union spe_reg str1;
+ u64 str2;
+ union spe_reg cr1;
+ u64 cr2;
+
+ /* TPR defaults */
+ /* ppe
+ * 1F - no full stop
+ * 08 - dynamic throttling starts if over 80 degrees
+ * 03 - dynamic throttling ceases if below 70 degrees */
+ tpr.ppe = 0x1F0803;
+ /* spe
+ * 10 - full stopped when over 96 degrees
+ * 08 - dynamic throttling starts if over 80 degrees
+ * 03 - dynamic throttling ceases if below 70 degrees
+ */
+ tpr.spe = 0x100803;
+
+ /* STR defaults */
+ /* str1
+ * 10 - stop 16 of 32 cycles
+ */
+ str1.val = 0x1010101010101010ull;
+ /* str2
+ * 10 - stop 16 of 32 cycles
+ */
+ str2 = 0x10;
+
+ /* CR defaults */
+ /* cr1
+ * 4 - normal operation
+ */
+ cr1.val = 0x0404040404040404ull;
+ /* cr2
+ * 4 - normal operation
+ */
+ cr2 = 0x04;
+
+ for_each_possible_cpu (cpu) {
+ pr_debug("processing cpu %d\n", cpu);
+ dev = get_cpu_device(cpu);
+
+ if (!dev) {
+ pr_info("invalid dev pointer for cbe_thermal\n");
+ return -EINVAL;
+ }
+
+ pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
+
+ if (!pmd_regs) {
+ pr_info("invalid CBE regs pointer for cbe_thermal\n");
+ return -EINVAL;
+ }
+
+ out_be64(&pmd_regs->tm_str2, str2);
+ out_be64(&pmd_regs->tm_str1.val, str1.val);
+ out_be64(&pmd_regs->tm_tpr.val, tpr.val);
+ out_be64(&pmd_regs->tm_cr1.val, cr1.val);
+ out_be64(&pmd_regs->tm_cr2, cr2);
+ }
+
+ return 0;
+}
+
+
+static int __init thermal_init(void)
+{
+ int rc = init_default_values();
+
+ if (rc == 0) {
+ spu_add_dev_attr_group(&spu_attribute_group);
+ cpu_add_dev_attr_group(&ppe_attribute_group);
+ }
+
+ return rc;
+}
+module_init(thermal_init);
+
+static void __exit thermal_exit(void)
+{
+ spu_remove_dev_attr_group(&spu_attribute_group);
+ cpu_remove_dev_attr_group(&ppe_attribute_group);
+}
+module_exit(thermal_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/cell.h b/arch/powerpc/platforms/cell/cell.h
new file mode 100644
index 000000000..d5142e905
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cell.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Cell Platform common data structures
+ *
+ * Copyright 2015, Daniel Axtens, IBM Corporation
+ */
+
+#ifndef CELL_H
+#define CELL_H
+
+#include <asm/pci-bridge.h>
+
+extern struct pci_controller_ops cell_pci_controller_ops;
+
+#endif
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
new file mode 100644
index 000000000..ca7849e11
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * spu aware cpufreq governor for the cell processor
+ *
+ * © Copyright IBM Corporation 2006-2008
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/atomic.h>
+#include <asm/machdep.h>
+#include <asm/spu.h>
+
+#define POLL_TIME 100000 /* in µs */
+#define EXP 753 /* exp(-1) in fixed-point */
+
+struct spu_gov_info_struct {
+ unsigned long busy_spus; /* fixed-point */
+ struct cpufreq_policy *policy;
+ struct delayed_work work;
+ unsigned int poll_int; /* µs */
+};
+static DEFINE_PER_CPU(struct spu_gov_info_struct, spu_gov_info);
+
+static int calc_freq(struct spu_gov_info_struct *info)
+{
+ int cpu;
+ int busy_spus;
+
+ cpu = info->policy->cpu;
+ busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
+
+ info->busy_spus = calc_load(info->busy_spus, EXP, busy_spus * FIXED_1);
+ pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
+ cpu, busy_spus, info->busy_spus);
+
+ return info->policy->max * info->busy_spus / FIXED_1;
+}
+
+static void spu_gov_work(struct work_struct *work)
+{
+ struct spu_gov_info_struct *info;
+ int delay;
+ unsigned long target_freq;
+
+ info = container_of(work, struct spu_gov_info_struct, work.work);
+
+ /* after cancel_delayed_work_sync we unset info->policy */
+ BUG_ON(info->policy == NULL);
+
+ target_freq = calc_freq(info);
+ __cpufreq_driver_target(info->policy, target_freq, CPUFREQ_RELATION_H);
+
+ delay = usecs_to_jiffies(info->poll_int);
+ schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_init_work(struct spu_gov_info_struct *info)
+{
+ int delay = usecs_to_jiffies(info->poll_int);
+ INIT_DEFERRABLE_WORK(&info->work, spu_gov_work);
+ schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
+}
+
+static void spu_gov_cancel_work(struct spu_gov_info_struct *info)
+{
+ cancel_delayed_work_sync(&info->work);
+}
+
+static int spu_gov_start(struct cpufreq_policy *policy)
+{
+ unsigned int cpu = policy->cpu;
+ struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu);
+ struct spu_gov_info_struct *affected_info;
+ int i;
+
+ if (!cpu_online(cpu)) {
+ printk(KERN_ERR "cpu %d is not online\n", cpu);
+ return -EINVAL;
+ }
+
+ if (!policy->cur) {
+ printk(KERN_ERR "no cpu specified in policy\n");
+ return -EINVAL;
+ }
+
+ /* initialize spu_gov_info for all affected cpus */
+ for_each_cpu(i, policy->cpus) {
+ affected_info = &per_cpu(spu_gov_info, i);
+ affected_info->policy = policy;
+ }
+
+ info->poll_int = POLL_TIME;
+
+ /* setup timer */
+ spu_gov_init_work(info);
+
+ return 0;
+}
+
+static void spu_gov_stop(struct cpufreq_policy *policy)
+{
+ unsigned int cpu = policy->cpu;
+ struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu);
+ int i;
+
+ /* cancel timer */
+ spu_gov_cancel_work(info);
+
+ /* clean spu_gov_info for all affected cpus */
+ for_each_cpu (i, policy->cpus) {
+ info = &per_cpu(spu_gov_info, i);
+ info->policy = NULL;
+ }
+}
+
+static struct cpufreq_governor spu_governor = {
+ .name = "spudemand",
+ .start = spu_gov_start,
+ .stop = spu_gov_stop,
+ .owner = THIS_MODULE,
+};
+cpufreq_governor_init(spu_governor);
+cpufreq_governor_exit(spu_governor);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
new file mode 100644
index 000000000..03ee8152e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cell Internal Interrupt Controller
+ *
+ * Copyright (C) 2006 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ * IBM, Corp.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ *
+ * TODO:
+ * - Fix various assumptions related to HW CPU numbers vs. linux CPU numbers
+ * vs node numbers in the setup code
+ * - Implement proper handling of maxcpus=1/2 (that is, routing of irqs from
+ * a non-active node to the active node)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/kernel_stat.h>
+#include <linux/pgtable.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+struct iic {
+ struct cbe_iic_thread_regs __iomem *regs;
+ u8 target_id;
+ u8 eoi_stack[16];
+ int eoi_ptr;
+ struct device_node *node;
+};
+
+static DEFINE_PER_CPU(struct iic, cpu_iic);
+#define IIC_NODE_COUNT 2
+static struct irq_domain *iic_host;
+
+/* Convert between "pending" bits and hw irq number */
+static irq_hw_number_t iic_pending_to_hwnum(struct cbe_iic_pending_bits bits)
+{
+ unsigned char unit = bits.source & 0xf;
+ unsigned char node = bits.source >> 4;
+ unsigned char class = bits.class & 3;
+
+ /* Decode IPIs */
+ if (bits.flags & CBE_IIC_IRQ_IPI)
+ return IIC_IRQ_TYPE_IPI | (bits.prio >> 4);
+ else
+ return (node << IIC_IRQ_NODE_SHIFT) | (class << 4) | unit;
+}
+
+static void iic_mask(struct irq_data *d)
+{
+}
+
+static void iic_unmask(struct irq_data *d)
+{
+}
+
+static void iic_eoi(struct irq_data *d)
+{
+ struct iic *iic = this_cpu_ptr(&cpu_iic);
+ out_be64(&iic->regs->prio, iic->eoi_stack[--iic->eoi_ptr]);
+ BUG_ON(iic->eoi_ptr < 0);
+}
+
+static struct irq_chip iic_chip = {
+ .name = "CELL-IIC",
+ .irq_mask = iic_mask,
+ .irq_unmask = iic_unmask,
+ .irq_eoi = iic_eoi,
+};
+
+
+static void iic_ioexc_eoi(struct irq_data *d)
+{
+}
+
+static void iic_ioexc_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct cbe_iic_regs __iomem *node_iic =
+ (void __iomem *)irq_desc_get_handler_data(desc);
+ unsigned int irq = irq_desc_get_irq(desc);
+ unsigned int base = (irq & 0xffffff00) | IIC_IRQ_TYPE_IOEXC;
+ unsigned long bits, ack;
+ int cascade;
+
+ for (;;) {
+ bits = in_be64(&node_iic->iic_is);
+ if (bits == 0)
+ break;
+ /* pre-ack edge interrupts */
+ ack = bits & IIC_ISR_EDGE_MASK;
+ if (ack)
+ out_be64(&node_iic->iic_is, ack);
+ /* handle them */
+ for (cascade = 63; cascade >= 0; cascade--)
+ if (bits & (0x8000000000000000UL >> cascade))
+ generic_handle_domain_irq(iic_host,
+ base | cascade);
+ /* post-ack level interrupts */
+ ack = bits & ~IIC_ISR_EDGE_MASK;
+ if (ack)
+ out_be64(&node_iic->iic_is, ack);
+ }
+ chip->irq_eoi(&desc->irq_data);
+}
+
+
+static struct irq_chip iic_ioexc_chip = {
+ .name = "CELL-IOEX",
+ .irq_mask = iic_mask,
+ .irq_unmask = iic_unmask,
+ .irq_eoi = iic_ioexc_eoi,
+};
+
+/* Get an IRQ number from the pending state register of the IIC */
+static unsigned int iic_get_irq(void)
+{
+ struct cbe_iic_pending_bits pending;
+ struct iic *iic;
+ unsigned int virq;
+
+ iic = this_cpu_ptr(&cpu_iic);
+ *(unsigned long *) &pending =
+ in_be64((u64 __iomem *) &iic->regs->pending_destr);
+ if (!(pending.flags & CBE_IIC_IRQ_VALID))
+ return 0;
+ virq = irq_linear_revmap(iic_host, iic_pending_to_hwnum(pending));
+ if (!virq)
+ return 0;
+ iic->eoi_stack[++iic->eoi_ptr] = pending.prio;
+ BUG_ON(iic->eoi_ptr > 15);
+ return virq;
+}
+
+void iic_setup_cpu(void)
+{
+ out_be64(&this_cpu_ptr(&cpu_iic)->regs->prio, 0xff);
+}
+
+u8 iic_get_target_id(int cpu)
+{
+ return per_cpu(cpu_iic, cpu).target_id;
+}
+
+EXPORT_SYMBOL_GPL(iic_get_target_id);
+
+#ifdef CONFIG_SMP
+
+/* Use the highest interrupt priorities for IPI */
+static inline int iic_msg_to_irq(int msg)
+{
+ return IIC_IRQ_TYPE_IPI + 0xf - msg;
+}
+
+void iic_message_pass(int cpu, int msg)
+{
+ out_be64(&per_cpu(cpu_iic, cpu).regs->generate, (0xf - msg) << 4);
+}
+
+static void iic_request_ipi(int msg)
+{
+ int virq;
+
+ virq = irq_create_mapping(iic_host, iic_msg_to_irq(msg));
+ if (!virq) {
+ printk(KERN_ERR
+ "iic: failed to map IPI %s\n", smp_ipi_name[msg]);
+ return;
+ }
+
+ /*
+ * If smp_request_message_ipi encounters an error it will notify
+ * the error. If a message is not needed it will return non-zero.
+ */
+ if (smp_request_message_ipi(virq, msg))
+ irq_dispose_mapping(virq);
+}
+
+void iic_request_IPIs(void)
+{
+ iic_request_ipi(PPC_MSG_CALL_FUNCTION);
+ iic_request_ipi(PPC_MSG_RESCHEDULE);
+ iic_request_ipi(PPC_MSG_TICK_BROADCAST);
+ iic_request_ipi(PPC_MSG_NMI_IPI);
+}
+
+#endif /* CONFIG_SMP */
+
+
+static int iic_host_match(struct irq_domain *h, struct device_node *node,
+ enum irq_domain_bus_token bus_token)
+{
+ return of_device_is_compatible(node,
+ "IBM,CBEA-Internal-Interrupt-Controller");
+}
+
+static int iic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ switch (hw & IIC_IRQ_TYPE_MASK) {
+ case IIC_IRQ_TYPE_IPI:
+ irq_set_chip_and_handler(virq, &iic_chip, handle_percpu_irq);
+ break;
+ case IIC_IRQ_TYPE_IOEXC:
+ irq_set_chip_and_handler(virq, &iic_ioexc_chip,
+ handle_edge_eoi_irq);
+ break;
+ default:
+ irq_set_chip_and_handler(virq, &iic_chip, handle_edge_eoi_irq);
+ }
+ return 0;
+}
+
+static int iic_host_xlate(struct irq_domain *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+ unsigned int node, ext, unit, class;
+ const u32 *val;
+
+ if (!of_device_is_compatible(ct,
+ "IBM,CBEA-Internal-Interrupt-Controller"))
+ return -ENODEV;
+ if (intsize != 1)
+ return -ENODEV;
+ val = of_get_property(ct, "#interrupt-cells", NULL);
+ if (val == NULL || *val != 1)
+ return -ENODEV;
+
+ node = intspec[0] >> 24;
+ ext = (intspec[0] >> 16) & 0xff;
+ class = (intspec[0] >> 8) & 0xff;
+ unit = intspec[0] & 0xff;
+
+ /* Check if node is in supported range */
+ if (node > 1)
+ return -EINVAL;
+
+ /* Build up interrupt number, special case for IO exceptions */
+ *out_hwirq = (node << IIC_IRQ_NODE_SHIFT);
+ if (unit == IIC_UNIT_IIC && class == 1)
+ *out_hwirq |= IIC_IRQ_TYPE_IOEXC | ext;
+ else
+ *out_hwirq |= IIC_IRQ_TYPE_NORMAL |
+ (class << IIC_IRQ_CLASS_SHIFT) | unit;
+
+ /* Dummy flags, ignored by iic code */
+ *out_flags = IRQ_TYPE_EDGE_RISING;
+
+ return 0;
+}
+
+static const struct irq_domain_ops iic_host_ops = {
+ .match = iic_host_match,
+ .map = iic_host_map,
+ .xlate = iic_host_xlate,
+};
+
+static void __init init_one_iic(unsigned int hw_cpu, unsigned long addr,
+ struct device_node *node)
+{
+ /* XXX FIXME: should locate the linux CPU number from the HW cpu
+ * number properly. We are lucky for now
+ */
+ struct iic *iic = &per_cpu(cpu_iic, hw_cpu);
+
+ iic->regs = ioremap(addr, sizeof(struct cbe_iic_thread_regs));
+ BUG_ON(iic->regs == NULL);
+
+ iic->target_id = ((hw_cpu & 2) << 3) | ((hw_cpu & 1) ? 0xf : 0xe);
+ iic->eoi_stack[0] = 0xff;
+ iic->node = of_node_get(node);
+ out_be64(&iic->regs->prio, 0);
+
+ printk(KERN_INFO "IIC for CPU %d target id 0x%x : %pOF\n",
+ hw_cpu, iic->target_id, node);
+}
+
+static int __init setup_iic(void)
+{
+ struct device_node *dn;
+ struct resource r0, r1;
+ unsigned int node, cascade, found = 0;
+ struct cbe_iic_regs __iomem *node_iic;
+ const u32 *np;
+
+ for_each_node_by_name(dn, "interrupt-controller") {
+ if (!of_device_is_compatible(dn,
+ "IBM,CBEA-Internal-Interrupt-Controller"))
+ continue;
+ np = of_get_property(dn, "ibm,interrupt-server-ranges", NULL);
+ if (np == NULL) {
+ printk(KERN_WARNING "IIC: CPU association not found\n");
+ of_node_put(dn);
+ return -ENODEV;
+ }
+ if (of_address_to_resource(dn, 0, &r0) ||
+ of_address_to_resource(dn, 1, &r1)) {
+ printk(KERN_WARNING "IIC: Can't resolve addresses\n");
+ of_node_put(dn);
+ return -ENODEV;
+ }
+ found++;
+ init_one_iic(np[0], r0.start, dn);
+ init_one_iic(np[1], r1.start, dn);
+
+ /* Setup cascade for IO exceptions. XXX cleanup tricks to get
+ * node vs CPU etc...
+ * Note that we configure the IIC_IRR here with a hard coded
+ * priority of 1. We might want to improve that later.
+ */
+ node = np[0] >> 1;
+ node_iic = cbe_get_cpu_iic_regs(np[0]);
+ cascade = node << IIC_IRQ_NODE_SHIFT;
+ cascade |= 1 << IIC_IRQ_CLASS_SHIFT;
+ cascade |= IIC_UNIT_IIC;
+ cascade = irq_create_mapping(iic_host, cascade);
+ if (!cascade)
+ continue;
+ /*
+ * irq_data is a generic pointer that gets passed back
+ * to us later, so the forced cast is fine.
+ */
+ irq_set_handler_data(cascade, (void __force *)node_iic);
+ irq_set_chained_handler(cascade, iic_ioexc_cascade);
+ out_be64(&node_iic->iic_ir,
+ (1 << 12) /* priority */ |
+ (node << 4) /* dest node */ |
+ IIC_UNIT_THREAD_0 /* route them to thread 0 */);
+ /* Flush pending (make sure it triggers if there is
+ * anything pending
+ */
+ out_be64(&node_iic->iic_is, 0xfffffffffffffffful);
+ }
+
+ if (found)
+ return 0;
+ else
+ return -ENODEV;
+}
+
+void __init iic_init_IRQ(void)
+{
+ /* Setup an irq host data structure */
+ iic_host = irq_domain_add_linear(NULL, IIC_SOURCE_COUNT, &iic_host_ops,
+ NULL);
+ BUG_ON(iic_host == NULL);
+ irq_set_default_host(iic_host);
+
+ /* Discover and initialize iics */
+ if (setup_iic() < 0)
+ panic("IIC: Failed to initialize !\n");
+
+ /* Set master interrupt handling function */
+ ppc_md.get_irq = iic_get_irq;
+
+ /* Enable on current CPU */
+ iic_setup_cpu();
+}
+
+void iic_set_interrupt_routing(int cpu, int thread, int priority)
+{
+ struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu);
+ u64 iic_ir = 0;
+ int node = cpu >> 1;
+
+ /* Set which node and thread will handle the next interrupt */
+ iic_ir |= CBE_IIC_IR_PRIO(priority) |
+ CBE_IIC_IR_DEST_NODE(node);
+ if (thread == 0)
+ iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0);
+ else
+ iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1);
+ out_be64(&iic_regs->iic_ir, iic_ir);
+}
diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h
new file mode 100644
index 000000000..a47902248
--- /dev/null
+++ b/arch/powerpc/platforms/cell/interrupt.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_CELL_PIC_H
+#define ASM_CELL_PIC_H
+#ifdef __KERNEL__
+/*
+ * Mapping of IIC pending bits into per-node interrupt numbers.
+ *
+ * Interrupt numbers are in the range 0...0x1ff where the top bit
+ * (0x100) represent the source node. Only 2 nodes are supported with
+ * the current code though it's trivial to extend that if necessary using
+ * higher level bits
+ *
+ * The bottom 8 bits are split into 2 type bits and 6 data bits that
+ * depend on the type:
+ *
+ * 00 (0x00 | data) : normal interrupt. data is (class << 4) | source
+ * 01 (0x40 | data) : IO exception. data is the exception number as
+ * defined by bit numbers in IIC_SR
+ * 10 (0x80 | data) : IPI. data is the IPI number (obtained from the priority)
+ * and node is always 0 (IPIs are per-cpu, their source is
+ * not relevant)
+ * 11 (0xc0 | data) : reserved
+ *
+ * In addition, interrupt number 0x80000000 is defined as always invalid
+ * (that is the node field is expected to never extend to move than 23 bits)
+ *
+ */
+
+enum {
+ IIC_IRQ_INVALID = 0x80000000u,
+ IIC_IRQ_NODE_MASK = 0x100,
+ IIC_IRQ_NODE_SHIFT = 8,
+ IIC_IRQ_MAX = 0x1ff,
+ IIC_IRQ_TYPE_MASK = 0xc0,
+ IIC_IRQ_TYPE_NORMAL = 0x00,
+ IIC_IRQ_TYPE_IOEXC = 0x40,
+ IIC_IRQ_TYPE_IPI = 0x80,
+ IIC_IRQ_CLASS_SHIFT = 4,
+ IIC_IRQ_CLASS_0 = 0x00,
+ IIC_IRQ_CLASS_1 = 0x10,
+ IIC_IRQ_CLASS_2 = 0x20,
+ IIC_SOURCE_COUNT = 0x200,
+
+ /* Here are defined the various source/dest units. Avoid using those
+ * definitions if you can, they are mostly here for reference
+ */
+ IIC_UNIT_SPU_0 = 0x4,
+ IIC_UNIT_SPU_1 = 0x7,
+ IIC_UNIT_SPU_2 = 0x3,
+ IIC_UNIT_SPU_3 = 0x8,
+ IIC_UNIT_SPU_4 = 0x2,
+ IIC_UNIT_SPU_5 = 0x9,
+ IIC_UNIT_SPU_6 = 0x1,
+ IIC_UNIT_SPU_7 = 0xa,
+ IIC_UNIT_IOC_0 = 0x0,
+ IIC_UNIT_IOC_1 = 0xb,
+ IIC_UNIT_THREAD_0 = 0xe, /* target only */
+ IIC_UNIT_THREAD_1 = 0xf, /* target only */
+ IIC_UNIT_IIC = 0xe, /* source only (IO exceptions) */
+
+ /* Base numbers for the external interrupts */
+ IIC_IRQ_EXT_IOIF0 =
+ IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_0,
+ IIC_IRQ_EXT_IOIF1 =
+ IIC_IRQ_TYPE_NORMAL | IIC_IRQ_CLASS_2 | IIC_UNIT_IOC_1,
+
+ /* Base numbers for the IIC_ISR interrupts */
+ IIC_IRQ_IOEX_TMI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 63,
+ IIC_IRQ_IOEX_PMI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 62,
+ IIC_IRQ_IOEX_ATI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 61,
+ IIC_IRQ_IOEX_MATBFI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 60,
+ IIC_IRQ_IOEX_ELDI = IIC_IRQ_TYPE_IOEXC | IIC_IRQ_CLASS_1 | 59,
+
+ /* Which bits in IIC_ISR are edge sensitive */
+ IIC_ISR_EDGE_MASK = 0x4ul,
+};
+
+extern void iic_init_IRQ(void);
+extern void iic_message_pass(int cpu, int msg);
+extern void iic_request_IPIs(void);
+extern void iic_setup_cpu(void);
+
+extern u8 iic_get_target_id(int cpu);
+
+extern void spider_init_IRQ(void);
+
+extern void iic_set_interrupt_routing(int cpu, int thread, int priority);
+
+#endif
+#endif /* ASM_CELL_PIC_H */
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
new file mode 100644
index 000000000..1202a69b0
--- /dev/null
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -0,0 +1,1094 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IOMMU implementation for Cell Broadband Processor Architecture
+ *
+ * (C) Copyright IBM Corporation 2006-2008
+ *
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+
+#include <asm/prom.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <asm/udbg.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "cell.h"
+#include "interrupt.h"
+
+/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages
+ * instead of leaving them mapped to some dummy page. This can be
+ * enabled once the appropriate workarounds for spider bugs have
+ * been enabled
+ */
+#define CELL_IOMMU_REAL_UNMAP
+
+/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of
+ * IO PTEs based on the transfer direction. That can be enabled
+ * once spider-net has been fixed to pass the correct direction
+ * to the DMA mapping functions
+ */
+#define CELL_IOMMU_STRICT_PROTECTION
+
+
+#define NR_IOMMUS 2
+
+/* IOC mmap registers */
+#define IOC_Reg_Size 0x2000
+
+#define IOC_IOPT_CacheInvd 0x908
+#define IOC_IOPT_CacheInvd_NE_Mask 0xffe0000000000000ul
+#define IOC_IOPT_CacheInvd_IOPTE_Mask 0x000003fffffffff8ul
+#define IOC_IOPT_CacheInvd_Busy 0x0000000000000001ul
+
+#define IOC_IOST_Origin 0x918
+#define IOC_IOST_Origin_E 0x8000000000000000ul
+#define IOC_IOST_Origin_HW 0x0000000000000800ul
+#define IOC_IOST_Origin_HL 0x0000000000000400ul
+
+#define IOC_IO_ExcpStat 0x920
+#define IOC_IO_ExcpStat_V 0x8000000000000000ul
+#define IOC_IO_ExcpStat_SPF_Mask 0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_S 0x6000000000000000ul
+#define IOC_IO_ExcpStat_SPF_P 0x2000000000000000ul
+#define IOC_IO_ExcpStat_ADDR_Mask 0x00000007fffff000ul
+#define IOC_IO_ExcpStat_RW_Mask 0x0000000000000800ul
+#define IOC_IO_ExcpStat_IOID_Mask 0x00000000000007fful
+
+#define IOC_IO_ExcpMask 0x928
+#define IOC_IO_ExcpMask_SFE 0x4000000000000000ul
+#define IOC_IO_ExcpMask_PFE 0x2000000000000000ul
+
+#define IOC_IOCmd_Offset 0x1000
+
+#define IOC_IOCmd_Cfg 0xc00
+#define IOC_IOCmd_Cfg_TE 0x0000800000000000ul
+
+
+/* Segment table entries */
+#define IOSTE_V 0x8000000000000000ul /* valid */
+#define IOSTE_H 0x4000000000000000ul /* cache hint */
+#define IOSTE_PT_Base_RPN_Mask 0x3ffffffffffff000ul /* base RPN of IOPT */
+#define IOSTE_NPPT_Mask 0x0000000000000fe0ul /* no. pages in IOPT */
+#define IOSTE_PS_Mask 0x0000000000000007ul /* page size */
+#define IOSTE_PS_4K 0x0000000000000001ul /* - 4kB */
+#define IOSTE_PS_64K 0x0000000000000003ul /* - 64kB */
+#define IOSTE_PS_1M 0x0000000000000005ul /* - 1MB */
+#define IOSTE_PS_16M 0x0000000000000007ul /* - 16MB */
+
+
+/* IOMMU sizing */
+#define IO_SEGMENT_SHIFT 28
+#define IO_PAGENO_BITS(shift) (IO_SEGMENT_SHIFT - (shift))
+
+/* The high bit needs to be set on every DMA address */
+#define SPIDER_DMA_OFFSET 0x80000000ul
+
+struct iommu_window {
+ struct list_head list;
+ struct cbe_iommu *iommu;
+ unsigned long offset;
+ unsigned long size;
+ unsigned int ioid;
+ struct iommu_table table;
+};
+
+#define NAMESIZE 8
+struct cbe_iommu {
+ int nid;
+ char name[NAMESIZE];
+ void __iomem *xlate_regs;
+ void __iomem *cmd_regs;
+ unsigned long *stab;
+ unsigned long *ptab;
+ void *pad_page;
+ struct list_head windows;
+};
+
+/* Static array of iommus, one per node
+ * each contains a list of windows, keyed from dma_window property
+ * - on bus setup, look for a matching window, or create one
+ * - on dev setup, assign iommu_table ptr
+ */
+static struct cbe_iommu iommus[NR_IOMMUS];
+static int cbe_nr_iommus;
+
+static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte,
+ long n_ptes)
+{
+ u64 __iomem *reg;
+ u64 val;
+ long n;
+
+ reg = iommu->xlate_regs + IOC_IOPT_CacheInvd;
+
+ while (n_ptes > 0) {
+ /* we can invalidate up to 1 << 11 PTEs at once */
+ n = min(n_ptes, 1l << 11);
+ val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask)
+ | (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask)
+ | IOC_IOPT_CacheInvd_Busy;
+
+ out_be64(reg, val);
+ while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy)
+ ;
+
+ n_ptes -= n;
+ pte += n;
+ }
+}
+
+static int tce_build_cell(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ int i;
+ unsigned long *io_pte, base_pte;
+ struct iommu_window *window =
+ container_of(tbl, struct iommu_window, table);
+
+ /* implementing proper protection causes problems with the spidernet
+ * driver - check mapping directions later, but allow read & write by
+ * default for now.*/
+#ifdef CELL_IOMMU_STRICT_PROTECTION
+ /* to avoid referencing a global, we use a trick here to setup the
+ * protection bit. "prot" is setup to be 3 fields of 4 bits appended
+ * together for each of the 3 supported direction values. It is then
+ * shifted left so that the fields matching the desired direction
+ * lands on the appropriate bits, and other bits are masked out.
+ */
+ const unsigned long prot = 0xc48;
+ base_pte =
+ ((prot << (52 + 4 * direction)) &
+ (CBE_IOPTE_PP_W | CBE_IOPTE_PP_R)) |
+ CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+ (window->ioid & CBE_IOPTE_IOID_Mask);
+#else
+ base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+ CBE_IOPTE_SO_RW | (window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+ if (unlikely(attrs & DMA_ATTR_WEAK_ORDERING))
+ base_pte &= ~CBE_IOPTE_SO_RW;
+
+ io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+ for (i = 0; i < npages; i++, uaddr += (1 << tbl->it_page_shift))
+ io_pte[i] = base_pte | (__pa(uaddr) & CBE_IOPTE_RPN_Mask);
+
+ mb();
+
+ invalidate_tce_cache(window->iommu, io_pte, npages);
+
+ pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n",
+ index, npages, direction, base_pte);
+ return 0;
+}
+
+static void tce_free_cell(struct iommu_table *tbl, long index, long npages)
+{
+
+ int i;
+ unsigned long *io_pte, pte;
+ struct iommu_window *window =
+ container_of(tbl, struct iommu_window, table);
+
+ pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages);
+
+#ifdef CELL_IOMMU_REAL_UNMAP
+ pte = 0;
+#else
+ /* spider bridge does PCI reads after freeing - insert a mapping
+ * to a scratch page instead of an invalid entry */
+ pte = CBE_IOPTE_PP_R | CBE_IOPTE_M | CBE_IOPTE_SO_RW |
+ __pa(window->iommu->pad_page) |
+ (window->ioid & CBE_IOPTE_IOID_Mask);
+#endif
+
+ io_pte = (unsigned long *)tbl->it_base + (index - tbl->it_offset);
+
+ for (i = 0; i < npages; i++)
+ io_pte[i] = pte;
+
+ mb();
+
+ invalidate_tce_cache(window->iommu, io_pte, npages);
+}
+
+static irqreturn_t ioc_interrupt(int irq, void *data)
+{
+ unsigned long stat, spf;
+ struct cbe_iommu *iommu = data;
+
+ stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+ spf = stat & IOC_IO_ExcpStat_SPF_Mask;
+
+ /* Might want to rate limit it */
+ printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat);
+ printk(KERN_ERR " V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n",
+ !!(stat & IOC_IO_ExcpStat_V),
+ (spf == IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ',
+ (spf == IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ',
+ (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write",
+ (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask));
+ printk(KERN_ERR " page=0x%016lx\n",
+ stat & IOC_IO_ExcpStat_ADDR_Mask);
+
+ /* clear interrupt */
+ stat &= ~IOC_IO_ExcpStat_V;
+ out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat);
+
+ return IRQ_HANDLED;
+}
+
+static int __init cell_iommu_find_ioc(int nid, unsigned long *base)
+{
+ struct device_node *np;
+ struct resource r;
+
+ *base = 0;
+
+ /* First look for new style /be nodes */
+ for_each_node_by_name(np, "ioc") {
+ if (of_node_to_nid(np) != nid)
+ continue;
+ if (of_address_to_resource(np, 0, &r)) {
+ printk(KERN_ERR "iommu: can't get address for %pOF\n",
+ np);
+ continue;
+ }
+ *base = r.start;
+ of_node_put(np);
+ return 0;
+ }
+
+ /* Ok, let's try the old way */
+ for_each_node_by_type(np, "cpu") {
+ const unsigned int *nidp;
+ const unsigned long *tmp;
+
+ nidp = of_get_property(np, "node-id", NULL);
+ if (nidp && *nidp == nid) {
+ tmp = of_get_property(np, "ioc-translation", NULL);
+ if (tmp) {
+ *base = *tmp;
+ of_node_put(np);
+ return 0;
+ }
+ }
+ }
+
+ return -ENODEV;
+}
+
+static void __init cell_iommu_setup_stab(struct cbe_iommu *iommu,
+ unsigned long dbase, unsigned long dsize,
+ unsigned long fbase, unsigned long fsize)
+{
+ struct page *page;
+ unsigned long segments, stab_size;
+
+ segments = max(dbase + dsize, fbase + fsize) >> IO_SEGMENT_SHIFT;
+
+ pr_debug("%s: iommu[%d]: segments: %lu\n",
+ __func__, iommu->nid, segments);
+
+ /* set up the segment table */
+ stab_size = segments * sizeof(unsigned long);
+ page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(stab_size));
+ BUG_ON(!page);
+ iommu->stab = page_address(page);
+ memset(iommu->stab, 0, stab_size);
+}
+
+static unsigned long *__init cell_iommu_alloc_ptab(struct cbe_iommu *iommu,
+ unsigned long base, unsigned long size, unsigned long gap_base,
+ unsigned long gap_size, unsigned long page_shift)
+{
+ struct page *page;
+ int i;
+ unsigned long reg, segments, pages_per_segment, ptab_size,
+ n_pte_pages, start_seg, *ptab;
+
+ start_seg = base >> IO_SEGMENT_SHIFT;
+ segments = size >> IO_SEGMENT_SHIFT;
+ pages_per_segment = 1ull << IO_PAGENO_BITS(page_shift);
+ /* PTEs for each segment must start on a 4K boundary */
+ pages_per_segment = max(pages_per_segment,
+ (1 << 12) / sizeof(unsigned long));
+
+ ptab_size = segments * pages_per_segment * sizeof(unsigned long);
+ pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __func__,
+ iommu->nid, ptab_size, get_order(ptab_size));
+ page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size));
+ BUG_ON(!page);
+
+ ptab = page_address(page);
+ memset(ptab, 0, ptab_size);
+
+ /* number of 4K pages needed for a page table */
+ n_pte_pages = (pages_per_segment * sizeof(unsigned long)) >> 12;
+
+ pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n",
+ __func__, iommu->nid, iommu->stab, ptab,
+ n_pte_pages);
+
+ /* initialise the STEs */
+ reg = IOSTE_V | ((n_pte_pages - 1) << 5);
+
+ switch (page_shift) {
+ case 12: reg |= IOSTE_PS_4K; break;
+ case 16: reg |= IOSTE_PS_64K; break;
+ case 20: reg |= IOSTE_PS_1M; break;
+ case 24: reg |= IOSTE_PS_16M; break;
+ default: BUG();
+ }
+
+ gap_base = gap_base >> IO_SEGMENT_SHIFT;
+ gap_size = gap_size >> IO_SEGMENT_SHIFT;
+
+ pr_debug("Setting up IOMMU stab:\n");
+ for (i = start_seg; i < (start_seg + segments); i++) {
+ if (i >= gap_base && i < (gap_base + gap_size)) {
+ pr_debug("\toverlap at %d, skipping\n", i);
+ continue;
+ }
+ iommu->stab[i] = reg | (__pa(ptab) + (n_pte_pages << 12) *
+ (i - start_seg));
+ pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]);
+ }
+
+ return ptab;
+}
+
+static void __init cell_iommu_enable_hardware(struct cbe_iommu *iommu)
+{
+ int ret;
+ unsigned long reg, xlate_base;
+ unsigned int virq;
+
+ if (cell_iommu_find_ioc(iommu->nid, &xlate_base))
+ panic("%s: missing IOC register mappings for node %d\n",
+ __func__, iommu->nid);
+
+ iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size);
+ iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset;
+
+ /* ensure that the STEs have updated */
+ mb();
+
+ /* setup interrupts for the iommu. */
+ reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat);
+ out_be64(iommu->xlate_regs + IOC_IO_ExcpStat,
+ reg & ~IOC_IO_ExcpStat_V);
+ out_be64(iommu->xlate_regs + IOC_IO_ExcpMask,
+ IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE);
+
+ virq = irq_create_mapping(NULL,
+ IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT));
+ BUG_ON(!virq);
+
+ ret = request_irq(virq, ioc_interrupt, 0, iommu->name, iommu);
+ BUG_ON(ret);
+
+ /* set the IOC segment table origin register (and turn on the iommu) */
+ reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW;
+ out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg);
+ in_be64(iommu->xlate_regs + IOC_IOST_Origin);
+
+ /* turn on IO translation */
+ reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE;
+ out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg);
+}
+
+static void __init cell_iommu_setup_hardware(struct cbe_iommu *iommu,
+ unsigned long base, unsigned long size)
+{
+ cell_iommu_setup_stab(iommu, base, size, 0, 0);
+ iommu->ptab = cell_iommu_alloc_ptab(iommu, base, size, 0, 0,
+ IOMMU_PAGE_SHIFT_4K);
+ cell_iommu_enable_hardware(iommu);
+}
+
+#if 0/* Unused for now */
+static struct iommu_window *find_window(struct cbe_iommu *iommu,
+ unsigned long offset, unsigned long size)
+{
+ struct iommu_window *window;
+
+ /* todo: check for overlapping (but not equal) windows) */
+
+ list_for_each_entry(window, &(iommu->windows), list) {
+ if (window->offset == offset && window->size == size)
+ return window;
+ }
+
+ return NULL;
+}
+#endif
+
+static inline u32 cell_iommu_get_ioid(struct device_node *np)
+{
+ const u32 *ioid;
+
+ ioid = of_get_property(np, "ioid", NULL);
+ if (ioid == NULL) {
+ printk(KERN_WARNING "iommu: missing ioid for %pOF using 0\n",
+ np);
+ return 0;
+ }
+
+ return *ioid;
+}
+
+static struct iommu_table_ops cell_iommu_ops = {
+ .set = tce_build_cell,
+ .clear = tce_free_cell
+};
+
+static struct iommu_window * __init
+cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np,
+ unsigned long offset, unsigned long size,
+ unsigned long pte_offset)
+{
+ struct iommu_window *window;
+ struct page *page;
+ u32 ioid;
+
+ ioid = cell_iommu_get_ioid(np);
+
+ window = kzalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid);
+ BUG_ON(window == NULL);
+
+ window->offset = offset;
+ window->size = size;
+ window->ioid = ioid;
+ window->iommu = iommu;
+
+ window->table.it_blocksize = 16;
+ window->table.it_base = (unsigned long)iommu->ptab;
+ window->table.it_index = iommu->nid;
+ window->table.it_page_shift = IOMMU_PAGE_SHIFT_4K;
+ window->table.it_offset =
+ (offset >> window->table.it_page_shift) + pte_offset;
+ window->table.it_size = size >> window->table.it_page_shift;
+ window->table.it_ops = &cell_iommu_ops;
+
+ if (!iommu_init_table(&window->table, iommu->nid, 0, 0))
+ panic("Failed to initialize iommu table");
+
+ pr_debug("\tioid %d\n", window->ioid);
+ pr_debug("\tblocksize %ld\n", window->table.it_blocksize);
+ pr_debug("\tbase 0x%016lx\n", window->table.it_base);
+ pr_debug("\toffset 0x%lx\n", window->table.it_offset);
+ pr_debug("\tsize %ld\n", window->table.it_size);
+
+ list_add(&window->list, &iommu->windows);
+
+ if (offset != 0)
+ return window;
+
+ /* We need to map and reserve the first IOMMU page since it's used
+ * by the spider workaround. In theory, we only need to do that when
+ * running on spider but it doesn't really matter.
+ *
+ * This code also assumes that we have a window that starts at 0,
+ * which is the case on all spider based blades.
+ */
+ page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0);
+ BUG_ON(!page);
+ iommu->pad_page = page_address(page);
+ clear_page(iommu->pad_page);
+
+ __set_bit(0, window->table.it_map);
+ tce_build_cell(&window->table, window->table.it_offset, 1,
+ (unsigned long)iommu->pad_page, DMA_TO_DEVICE, 0);
+
+ return window;
+}
+
+static struct cbe_iommu *cell_iommu_for_node(int nid)
+{
+ int i;
+
+ for (i = 0; i < cbe_nr_iommus; i++)
+ if (iommus[i].nid == nid)
+ return &iommus[i];
+ return NULL;
+}
+
+static unsigned long cell_dma_nommu_offset;
+
+static unsigned long dma_iommu_fixed_base;
+static bool cell_iommu_enabled;
+
+/* iommu_fixed_is_weak is set if booted with iommu_fixed=weak */
+bool iommu_fixed_is_weak;
+
+static struct iommu_table *cell_get_iommu_table(struct device *dev)
+{
+ struct iommu_window *window;
+ struct cbe_iommu *iommu;
+
+ /* Current implementation uses the first window available in that
+ * node's iommu. We -might- do something smarter later though it may
+ * never be necessary
+ */
+ iommu = cell_iommu_for_node(dev_to_node(dev));
+ if (iommu == NULL || list_empty(&iommu->windows)) {
+ dev_err(dev, "iommu: missing iommu for %pOF (node %d)\n",
+ dev->of_node, dev_to_node(dev));
+ return NULL;
+ }
+ window = list_entry(iommu->windows.next, struct iommu_window, list);
+
+ return &window->table;
+}
+
+static u64 cell_iommu_get_fixed_address(struct device *dev);
+
+static void cell_dma_dev_setup(struct device *dev)
+{
+ if (cell_iommu_enabled) {
+ u64 addr = cell_iommu_get_fixed_address(dev);
+
+ if (addr != OF_BAD_ADDR)
+ dev->archdata.dma_offset = addr + dma_iommu_fixed_base;
+ set_iommu_table_base(dev, cell_get_iommu_table(dev));
+ } else {
+ dev->archdata.dma_offset = cell_dma_nommu_offset;
+ }
+}
+
+static void cell_pci_dma_dev_setup(struct pci_dev *dev)
+{
+ cell_dma_dev_setup(&dev->dev);
+}
+
+static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct device *dev = data;
+
+ /* We are only interested in device addition */
+ if (action != BUS_NOTIFY_ADD_DEVICE)
+ return 0;
+
+ if (cell_iommu_enabled)
+ dev->dma_ops = &dma_iommu_ops;
+ cell_dma_dev_setup(dev);
+ return 0;
+}
+
+static struct notifier_block cell_of_bus_notifier = {
+ .notifier_call = cell_of_bus_notify
+};
+
+static int __init cell_iommu_get_window(struct device_node *np,
+ unsigned long *base,
+ unsigned long *size)
+{
+ const __be32 *dma_window;
+ unsigned long index;
+
+ /* Use ibm,dma-window if available, else, hard code ! */
+ dma_window = of_get_property(np, "ibm,dma-window", NULL);
+ if (dma_window == NULL) {
+ *base = 0;
+ *size = 0x80000000u;
+ return -ENODEV;
+ }
+
+ of_parse_dma_window(np, dma_window, &index, base, size);
+ return 0;
+}
+
+static struct cbe_iommu * __init cell_iommu_alloc(struct device_node *np)
+{
+ struct cbe_iommu *iommu;
+ int nid, i;
+
+ /* Get node ID */
+ nid = of_node_to_nid(np);
+ if (nid < 0) {
+ printk(KERN_ERR "iommu: failed to get node for %pOF\n",
+ np);
+ return NULL;
+ }
+ pr_debug("iommu: setting up iommu for node %d (%pOF)\n",
+ nid, np);
+
+ /* XXX todo: If we can have multiple windows on the same IOMMU, which
+ * isn't the case today, we probably want here to check whether the
+ * iommu for that node is already setup.
+ * However, there might be issue with getting the size right so let's
+ * ignore that for now. We might want to completely get rid of the
+ * multiple window support since the cell iommu supports per-page ioids
+ */
+
+ if (cbe_nr_iommus >= NR_IOMMUS) {
+ printk(KERN_ERR "iommu: too many IOMMUs detected ! (%pOF)\n",
+ np);
+ return NULL;
+ }
+
+ /* Init base fields */
+ i = cbe_nr_iommus++;
+ iommu = &iommus[i];
+ iommu->stab = NULL;
+ iommu->nid = nid;
+ snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i);
+ INIT_LIST_HEAD(&iommu->windows);
+
+ return iommu;
+}
+
+static void __init cell_iommu_init_one(struct device_node *np,
+ unsigned long offset)
+{
+ struct cbe_iommu *iommu;
+ unsigned long base, size;
+
+ iommu = cell_iommu_alloc(np);
+ if (!iommu)
+ return;
+
+ /* Obtain a window for it */
+ cell_iommu_get_window(np, &base, &size);
+
+ pr_debug("\ttranslating window 0x%lx...0x%lx\n",
+ base, base + size - 1);
+
+ /* Initialize the hardware */
+ cell_iommu_setup_hardware(iommu, base, size);
+
+ /* Setup the iommu_table */
+ cell_iommu_setup_window(iommu, np, base, size,
+ offset >> IOMMU_PAGE_SHIFT_4K);
+}
+
+static void __init cell_disable_iommus(void)
+{
+ int node;
+ unsigned long base, val;
+ void __iomem *xregs, *cregs;
+
+ /* Make sure IOC translation is disabled on all nodes */
+ for_each_online_node(node) {
+ if (cell_iommu_find_ioc(node, &base))
+ continue;
+ xregs = ioremap(base, IOC_Reg_Size);
+ if (xregs == NULL)
+ continue;
+ cregs = xregs + IOC_IOCmd_Offset;
+
+ pr_debug("iommu: cleaning up iommu on node %d\n", node);
+
+ out_be64(xregs + IOC_IOST_Origin, 0);
+ (void)in_be64(xregs + IOC_IOST_Origin);
+ val = in_be64(cregs + IOC_IOCmd_Cfg);
+ val &= ~IOC_IOCmd_Cfg_TE;
+ out_be64(cregs + IOC_IOCmd_Cfg, val);
+ (void)in_be64(cregs + IOC_IOCmd_Cfg);
+
+ iounmap(xregs);
+ }
+}
+
+static int __init cell_iommu_init_disabled(void)
+{
+ struct device_node *np = NULL;
+ unsigned long base = 0, size;
+
+ /* When no iommu is present, we use direct DMA ops */
+
+ /* First make sure all IOC translation is turned off */
+ cell_disable_iommus();
+
+ /* If we have no Axon, we set up the spider DMA magic offset */
+ np = of_find_node_by_name(NULL, "axon");
+ if (!np)
+ cell_dma_nommu_offset = SPIDER_DMA_OFFSET;
+ of_node_put(np);
+
+ /* Now we need to check to see where the memory is mapped
+ * in PCI space. We assume that all busses use the same dma
+ * window which is always the case so far on Cell, thus we
+ * pick up the first pci-internal node we can find and check
+ * the DMA window from there.
+ */
+ for_each_node_by_name(np, "axon") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ if (cell_iommu_get_window(np, &base, &size) == 0)
+ break;
+ }
+ if (np == NULL) {
+ for_each_node_by_name(np, "pci-internal") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ if (cell_iommu_get_window(np, &base, &size) == 0)
+ break;
+ }
+ }
+ of_node_put(np);
+
+ /* If we found a DMA window, we check if it's big enough to enclose
+ * all of physical memory. If not, we force enable IOMMU
+ */
+ if (np && size < memblock_end_of_DRAM()) {
+ printk(KERN_WARNING "iommu: force-enabled, dma window"
+ " (%ldMB) smaller than total memory (%lldMB)\n",
+ size >> 20, memblock_end_of_DRAM() >> 20);
+ return -ENODEV;
+ }
+
+ cell_dma_nommu_offset += base;
+
+ if (cell_dma_nommu_offset != 0)
+ cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
+
+ printk("iommu: disabled, direct DMA offset is 0x%lx\n",
+ cell_dma_nommu_offset);
+
+ return 0;
+}
+
+/*
+ * Fixed IOMMU mapping support
+ *
+ * This code adds support for setting up a fixed IOMMU mapping on certain
+ * cell machines. For 64-bit devices this avoids the performance overhead of
+ * mapping and unmapping pages at runtime. 32-bit devices are unable to use
+ * the fixed mapping.
+ *
+ * The fixed mapping is established at boot, and maps all of physical memory
+ * 1:1 into device space at some offset. On machines with < 30 GB of memory
+ * we setup the fixed mapping immediately above the normal IOMMU window.
+ *
+ * For example a machine with 4GB of memory would end up with the normal
+ * IOMMU window from 0-2GB and the fixed mapping window from 2GB to 6GB. In
+ * this case a 64-bit device wishing to DMA to 1GB would be told to DMA to
+ * 3GB, plus any offset required by firmware. The firmware offset is encoded
+ * in the "dma-ranges" property.
+ *
+ * On machines with 30GB or more of memory, we are unable to place the fixed
+ * mapping above the normal IOMMU window as we would run out of address space.
+ * Instead we move the normal IOMMU window to coincide with the hash page
+ * table, this region does not need to be part of the fixed mapping as no
+ * device should ever be DMA'ing to it. We then setup the fixed mapping
+ * from 0 to 32GB.
+ */
+
+static u64 cell_iommu_get_fixed_address(struct device *dev)
+{
+ u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR;
+ struct device_node *np;
+ const u32 *ranges = NULL;
+ int i, len, best, naddr, nsize, pna, range_size;
+
+ /* We can be called for platform devices that have no of_node */
+ np = of_node_get(dev->of_node);
+ if (!np)
+ goto out;
+
+ while (1) {
+ naddr = of_n_addr_cells(np);
+ nsize = of_n_size_cells(np);
+ np = of_get_next_parent(np);
+ if (!np)
+ break;
+
+ ranges = of_get_property(np, "dma-ranges", &len);
+
+ /* Ignore empty ranges, they imply no translation required */
+ if (ranges && len > 0)
+ break;
+ }
+
+ if (!ranges) {
+ dev_dbg(dev, "iommu: no dma-ranges found\n");
+ goto out;
+ }
+
+ len /= sizeof(u32);
+
+ pna = of_n_addr_cells(np);
+ range_size = naddr + nsize + pna;
+
+ /* dma-ranges format:
+ * child addr : naddr cells
+ * parent addr : pna cells
+ * size : nsize cells
+ */
+ for (i = 0, best = -1, best_size = 0; i < len; i += range_size) {
+ cpu_addr = of_translate_dma_address(np, ranges + i + naddr);
+ size = of_read_number(ranges + i + naddr + pna, nsize);
+
+ if (cpu_addr == 0 && size > best_size) {
+ best = i;
+ best_size = size;
+ }
+ }
+
+ if (best >= 0) {
+ dev_addr = of_read_number(ranges + best, naddr);
+ } else
+ dev_dbg(dev, "iommu: no suitable range found!\n");
+
+out:
+ of_node_put(np);
+
+ return dev_addr;
+}
+
+static bool cell_pci_iommu_bypass_supported(struct pci_dev *pdev, u64 mask)
+{
+ return mask == DMA_BIT_MASK(64) &&
+ cell_iommu_get_fixed_address(&pdev->dev) != OF_BAD_ADDR;
+}
+
+static void __init insert_16M_pte(unsigned long addr, unsigned long *ptab,
+ unsigned long base_pte)
+{
+ unsigned long segment, offset;
+
+ segment = addr >> IO_SEGMENT_SHIFT;
+ offset = (addr >> 24) - (segment << IO_PAGENO_BITS(24));
+ ptab = ptab + (segment * (1 << 12) / sizeof(unsigned long));
+
+ pr_debug("iommu: addr %lx ptab %p segment %lx offset %lx\n",
+ addr, ptab, segment, offset);
+
+ ptab[offset] = base_pte | (__pa(addr) & CBE_IOPTE_RPN_Mask);
+}
+
+static void __init cell_iommu_setup_fixed_ptab(struct cbe_iommu *iommu,
+ struct device_node *np, unsigned long dbase, unsigned long dsize,
+ unsigned long fbase, unsigned long fsize)
+{
+ unsigned long base_pte, uaddr, ioaddr, *ptab;
+
+ ptab = cell_iommu_alloc_ptab(iommu, fbase, fsize, dbase, dsize, 24);
+
+ dma_iommu_fixed_base = fbase;
+
+ pr_debug("iommu: mapping 0x%lx pages from 0x%lx\n", fsize, fbase);
+
+ base_pte = CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_M |
+ (cell_iommu_get_ioid(np) & CBE_IOPTE_IOID_Mask);
+
+ if (iommu_fixed_is_weak)
+ pr_info("IOMMU: Using weak ordering for fixed mapping\n");
+ else {
+ pr_info("IOMMU: Using strong ordering for fixed mapping\n");
+ base_pte |= CBE_IOPTE_SO_RW;
+ }
+
+ for (uaddr = 0; uaddr < fsize; uaddr += (1 << 24)) {
+ /* Don't touch the dynamic region */
+ ioaddr = uaddr + fbase;
+ if (ioaddr >= dbase && ioaddr < (dbase + dsize)) {
+ pr_debug("iommu: fixed/dynamic overlap, skipping\n");
+ continue;
+ }
+
+ insert_16M_pte(uaddr, ptab, base_pte);
+ }
+
+ mb();
+}
+
+static int __init cell_iommu_fixed_mapping_init(void)
+{
+ unsigned long dbase, dsize, fbase, fsize, hbase, hend;
+ struct cbe_iommu *iommu;
+ struct device_node *np;
+
+ /* The fixed mapping is only supported on axon machines */
+ np = of_find_node_by_name(NULL, "axon");
+ of_node_put(np);
+
+ if (!np) {
+ pr_debug("iommu: fixed mapping disabled, no axons found\n");
+ return -1;
+ }
+
+ /* We must have dma-ranges properties for fixed mapping to work */
+ np = of_find_node_with_property(NULL, "dma-ranges");
+ of_node_put(np);
+
+ if (!np) {
+ pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
+ return -1;
+ }
+
+ /* The default setup is to have the fixed mapping sit after the
+ * dynamic region, so find the top of the largest IOMMU window
+ * on any axon, then add the size of RAM and that's our max value.
+ * If that is > 32GB we have to do other shennanigans.
+ */
+ fbase = 0;
+ for_each_node_by_name(np, "axon") {
+ cell_iommu_get_window(np, &dbase, &dsize);
+ fbase = max(fbase, dbase + dsize);
+ }
+
+ fbase = ALIGN(fbase, 1 << IO_SEGMENT_SHIFT);
+ fsize = memblock_phys_mem_size();
+
+ if ((fbase + fsize) <= 0x800000000ul)
+ hbase = 0; /* use the device tree window */
+ else {
+ /* If we're over 32 GB we need to cheat. We can't map all of
+ * RAM with the fixed mapping, and also fit the dynamic
+ * region. So try to place the dynamic region where the hash
+ * table sits, drivers never need to DMA to it, we don't
+ * need a fixed mapping for that area.
+ */
+ if (!htab_address) {
+ pr_debug("iommu: htab is NULL, on LPAR? Huh?\n");
+ return -1;
+ }
+ hbase = __pa(htab_address);
+ hend = hbase + htab_size_bytes;
+
+ /* The window must start and end on a segment boundary */
+ if ((hbase != ALIGN(hbase, 1 << IO_SEGMENT_SHIFT)) ||
+ (hend != ALIGN(hend, 1 << IO_SEGMENT_SHIFT))) {
+ pr_debug("iommu: hash window not segment aligned\n");
+ return -1;
+ }
+
+ /* Check the hash window fits inside the real DMA window */
+ for_each_node_by_name(np, "axon") {
+ cell_iommu_get_window(np, &dbase, &dsize);
+
+ if (hbase < dbase || (hend > (dbase + dsize))) {
+ pr_debug("iommu: hash window doesn't fit in"
+ "real DMA window\n");
+ of_node_put(np);
+ return -1;
+ }
+ }
+
+ fbase = 0;
+ }
+
+ /* Setup the dynamic regions */
+ for_each_node_by_name(np, "axon") {
+ iommu = cell_iommu_alloc(np);
+ BUG_ON(!iommu);
+
+ if (hbase == 0)
+ cell_iommu_get_window(np, &dbase, &dsize);
+ else {
+ dbase = hbase;
+ dsize = htab_size_bytes;
+ }
+
+ printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
+ "fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
+ dbase + dsize, fbase, fbase + fsize);
+
+ cell_iommu_setup_stab(iommu, dbase, dsize, fbase, fsize);
+ iommu->ptab = cell_iommu_alloc_ptab(iommu, dbase, dsize, 0, 0,
+ IOMMU_PAGE_SHIFT_4K);
+ cell_iommu_setup_fixed_ptab(iommu, np, dbase, dsize,
+ fbase, fsize);
+ cell_iommu_enable_hardware(iommu);
+ cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
+ }
+
+ cell_pci_controller_ops.iommu_bypass_supported =
+ cell_pci_iommu_bypass_supported;
+ return 0;
+}
+
+static int iommu_fixed_disabled;
+
+static int __init setup_iommu_fixed(char *str)
+{
+ struct device_node *pciep;
+
+ if (strcmp(str, "off") == 0)
+ iommu_fixed_disabled = 1;
+
+ /* If we can find a pcie-endpoint in the device tree assume that
+ * we're on a triblade or a CAB so by default the fixed mapping
+ * should be set to be weakly ordered; but only if the boot
+ * option WASN'T set for strong ordering
+ */
+ pciep = of_find_node_by_type(NULL, "pcie-endpoint");
+
+ if (strcmp(str, "weak") == 0 || (pciep && strcmp(str, "strong") != 0))
+ iommu_fixed_is_weak = true;
+
+ of_node_put(pciep);
+
+ return 1;
+}
+__setup("iommu_fixed=", setup_iommu_fixed);
+
+static int __init cell_iommu_init(void)
+{
+ struct device_node *np;
+
+ /* If IOMMU is disabled or we have little enough RAM to not need
+ * to enable it, we setup a direct mapping.
+ *
+ * Note: should we make sure we have the IOMMU actually disabled ?
+ */
+ if (iommu_is_off ||
+ (!iommu_force_on && memblock_end_of_DRAM() <= 0x80000000ull))
+ if (cell_iommu_init_disabled() == 0)
+ goto bail;
+
+ /* Setup various callbacks */
+ cell_pci_controller_ops.dma_dev_setup = cell_pci_dma_dev_setup;
+
+ if (!iommu_fixed_disabled && cell_iommu_fixed_mapping_init() == 0)
+ goto done;
+
+ /* Create an iommu for each /axon node. */
+ for_each_node_by_name(np, "axon") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ cell_iommu_init_one(np, 0);
+ }
+
+ /* Create an iommu for each toplevel /pci-internal node for
+ * old hardware/firmware
+ */
+ for_each_node_by_name(np, "pci-internal") {
+ if (np->parent == NULL || np->parent->parent != NULL)
+ continue;
+ cell_iommu_init_one(np, SPIDER_DMA_OFFSET);
+ }
+ done:
+ /* Setup default PCI iommu ops */
+ set_pci_dma_ops(&dma_iommu_ops);
+ cell_iommu_enabled = true;
+ bail:
+ /* Register callbacks on OF platform device addition/removal
+ * to handle linking them to the right DMA operations
+ */
+ bus_register_notifier(&platform_bus_type, &cell_of_bus_notifier);
+
+ return 0;
+}
+machine_arch_initcall(cell, cell_iommu_init);
diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c
new file mode 100644
index 000000000..58d967ee3
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * CBE Pervasive Monitor and Debug
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ * Michael N. Day (mnday@us.ibm.com)
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/kallsyms.h>
+#include <linux/pgtable.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/reg.h>
+#include <asm/cell-regs.h>
+#include <asm/cpu_has_feature.h>
+
+#include "pervasive.h"
+#include "ras.h"
+
+static void cbe_power_save(void)
+{
+ unsigned long ctrl, thread_switch_control;
+
+ /* Ensure our interrupt state is properly tracked */
+ if (!prep_irq_for_idle())
+ return;
+
+ ctrl = mfspr(SPRN_CTRLF);
+
+ /* Enable DEC and EE interrupt request */
+ thread_switch_control = mfspr(SPRN_TSC_CELL);
+ thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST;
+
+ switch (ctrl & CTRL_CT) {
+ case CTRL_CT0:
+ thread_switch_control |= TSC_CELL_DEC_ENABLE_0;
+ break;
+ case CTRL_CT1:
+ thread_switch_control |= TSC_CELL_DEC_ENABLE_1;
+ break;
+ default:
+ printk(KERN_WARNING "%s: unknown configuration\n",
+ __func__);
+ break;
+ }
+ mtspr(SPRN_TSC_CELL, thread_switch_control);
+
+ /*
+ * go into low thread priority, medium priority will be
+ * restored for us after wake-up.
+ */
+ HMT_low();
+
+ /*
+ * atomically disable thread execution and runlatch.
+ * External and Decrementer exceptions are still handled when the
+ * thread is disabled but now enter in cbe_system_reset_exception()
+ */
+ ctrl &= ~(CTRL_RUNLATCH | CTRL_TE);
+ mtspr(SPRN_CTRLT, ctrl);
+
+ /* Re-enable interrupts in MSR */
+ __hard_irq_enable();
+}
+
+static int cbe_system_reset_exception(struct pt_regs *regs)
+{
+ switch (regs->msr & SRR1_WAKEMASK) {
+ case SRR1_WAKEDEC:
+ set_dec(1);
+ break;
+ case SRR1_WAKEEE:
+ /*
+ * Handle these when interrupts get re-enabled and we take
+ * them as regular exceptions. We are in an NMI context
+ * and can't handle these here.
+ */
+ break;
+ case SRR1_WAKEMT:
+ return cbe_sysreset_hack();
+#ifdef CONFIG_CBE_RAS
+ case SRR1_WAKESYSERR:
+ cbe_system_error_exception(regs);
+ break;
+ case SRR1_WAKETHERM:
+ cbe_thermal_exception(regs);
+ break;
+#endif /* CONFIG_CBE_RAS */
+ default:
+ /* do system reset */
+ return 0;
+ }
+ /* everything handled */
+ return 1;
+}
+
+void __init cbe_pervasive_init(void)
+{
+ int cpu;
+
+ if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu);
+ if (!regs)
+ continue;
+
+ /* Enable Pause(0) control bit */
+ out_be64(&regs->pmcr, in_be64(&regs->pmcr) |
+ CBE_PMD_PAUSE_ZERO_CONTROL);
+ }
+
+ ppc_md.power_save = cbe_power_save;
+ ppc_md.system_reset_exception = cbe_system_reset_exception;
+}
diff --git a/arch/powerpc/platforms/cell/pervasive.h b/arch/powerpc/platforms/cell/pervasive.h
new file mode 100644
index 000000000..0da74ab10
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pervasive.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Cell Pervasive Monitor and Debug interface and HW structures
+ *
+ * (C) Copyright IBM Corporation 2005
+ *
+ * Authors: Maximino Aguilar (maguilar@us.ibm.com)
+ * David J. Erb (djerb@us.ibm.com)
+ */
+
+
+#ifndef PERVASIVE_H
+#define PERVASIVE_H
+
+extern void cbe_pervasive_init(void);
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+extern int cbe_sysreset_hack(void);
+#else
+static inline int cbe_sysreset_hack(void)
+{
+ return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+#endif
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
new file mode 100644
index 000000000..b207a7f99
--- /dev/null
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cell Broadband Engine Performance Monitor
+ *
+ * (C) Copyright IBM Corporation 2001,2006
+ *
+ * Author:
+ * David Erb (djerb@us.ibm.com)
+ * Kevin Corry (kevcorry@us.ibm.com)
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/types.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/irq_regs.h>
+#include <asm/machdep.h>
+#include <asm/pmc.h>
+#include <asm/reg.h>
+#include <asm/spu.h>
+#include <asm/cell-regs.h>
+
+#include "interrupt.h"
+
+/*
+ * When writing to write-only mmio addresses, save a shadow copy. All of the
+ * registers are 32-bit, but stored in the upper-half of a 64-bit field in
+ * pmd_regs.
+ */
+
+#define WRITE_WO_MMIO(reg, x) \
+ do { \
+ u32 _x = (x); \
+ struct cbe_pmd_regs __iomem *pmd_regs; \
+ struct cbe_pmd_shadow_regs *shadow_regs; \
+ pmd_regs = cbe_get_cpu_pmd_regs(cpu); \
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \
+ out_be64(&(pmd_regs->reg), (((u64)_x) << 32)); \
+ shadow_regs->reg = _x; \
+ } while (0)
+
+#define READ_SHADOW_REG(val, reg) \
+ do { \
+ struct cbe_pmd_shadow_regs *shadow_regs; \
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \
+ (val) = shadow_regs->reg; \
+ } while (0)
+
+#define READ_MMIO_UPPER32(val, reg) \
+ do { \
+ struct cbe_pmd_regs __iomem *pmd_regs; \
+ pmd_regs = cbe_get_cpu_pmd_regs(cpu); \
+ (val) = (u32)(in_be64(&pmd_regs->reg) >> 32); \
+ } while (0)
+
+/*
+ * Physical counter registers.
+ * Each physical counter can act as one 32-bit counter or two 16-bit counters.
+ */
+
+u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr)
+{
+ u32 val_in_latch, val = 0;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ READ_SHADOW_REG(val_in_latch, counter_value_in_latch);
+
+ /* Read the latch or the actual counter, whichever is newer. */
+ if (val_in_latch & (1 << phys_ctr)) {
+ READ_SHADOW_REG(val, pm_ctr[phys_ctr]);
+ } else {
+ READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]);
+ }
+ }
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_phys_ctr);
+
+void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val)
+{
+ struct cbe_pmd_shadow_regs *shadow_regs;
+ u32 pm_ctrl;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ /* Writing to a counter only writes to a hardware latch.
+ * The new value is not propagated to the actual counter
+ * until the performance monitor is enabled.
+ */
+ WRITE_WO_MMIO(pm_ctr[phys_ctr], val);
+
+ pm_ctrl = cbe_read_pm(cpu, pm_control);
+ if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) {
+ /* The counters are already active, so we need to
+ * rewrite the pm_control register to "re-enable"
+ * the PMU.
+ */
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+ } else {
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+ shadow_regs->counter_value_in_latch |= (1 << phys_ctr);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(cbe_write_phys_ctr);
+
+/*
+ * "Logical" counter registers.
+ * These will read/write 16-bits or 32-bits depending on the
+ * current size of the counter. Counters 4 - 7 are always 16-bit.
+ */
+
+u32 cbe_read_ctr(u32 cpu, u32 ctr)
+{
+ u32 val;
+ u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+ val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+ if (cbe_get_ctr_size(cpu, phys_ctr) == 16)
+ val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff);
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_ctr);
+
+void cbe_write_ctr(u32 cpu, u32 ctr, u32 val)
+{
+ u32 phys_ctr;
+ u32 phys_val;
+
+ phys_ctr = ctr & (NR_PHYS_CTRS - 1);
+
+ if (cbe_get_ctr_size(cpu, phys_ctr) == 16) {
+ phys_val = cbe_read_phys_ctr(cpu, phys_ctr);
+
+ if (ctr < NR_PHYS_CTRS)
+ val = (val << 16) | (phys_val & 0xffff);
+ else
+ val = (val & 0xffff) | (phys_val & 0xffff0000);
+ }
+
+ cbe_write_phys_ctr(cpu, phys_ctr, val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_ctr);
+
+/*
+ * Counter-control registers.
+ * Each "logical" counter has a corresponding control register.
+ */
+
+u32 cbe_read_pm07_control(u32 cpu, u32 ctr)
+{
+ u32 pm07_control = 0;
+
+ if (ctr < NR_CTRS)
+ READ_SHADOW_REG(pm07_control, pm07_control[ctr]);
+
+ return pm07_control;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm07_control);
+
+void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val)
+{
+ if (ctr < NR_CTRS)
+ WRITE_WO_MMIO(pm07_control[ctr], val);
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm07_control);
+
+/*
+ * Other PMU control registers. Most of these are write-only.
+ */
+
+u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg)
+{
+ u32 val = 0;
+
+ switch (reg) {
+ case group_control:
+ READ_SHADOW_REG(val, group_control);
+ break;
+
+ case debug_bus_control:
+ READ_SHADOW_REG(val, debug_bus_control);
+ break;
+
+ case trace_address:
+ READ_MMIO_UPPER32(val, trace_address);
+ break;
+
+ case ext_tr_timer:
+ READ_SHADOW_REG(val, ext_tr_timer);
+ break;
+
+ case pm_status:
+ READ_MMIO_UPPER32(val, pm_status);
+ break;
+
+ case pm_control:
+ READ_SHADOW_REG(val, pm_control);
+ break;
+
+ case pm_interval:
+ READ_MMIO_UPPER32(val, pm_interval);
+ break;
+
+ case pm_start_stop:
+ READ_SHADOW_REG(val, pm_start_stop);
+ break;
+ }
+
+ return val;
+}
+EXPORT_SYMBOL_GPL(cbe_read_pm);
+
+void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val)
+{
+ switch (reg) {
+ case group_control:
+ WRITE_WO_MMIO(group_control, val);
+ break;
+
+ case debug_bus_control:
+ WRITE_WO_MMIO(debug_bus_control, val);
+ break;
+
+ case trace_address:
+ WRITE_WO_MMIO(trace_address, val);
+ break;
+
+ case ext_tr_timer:
+ WRITE_WO_MMIO(ext_tr_timer, val);
+ break;
+
+ case pm_status:
+ WRITE_WO_MMIO(pm_status, val);
+ break;
+
+ case pm_control:
+ WRITE_WO_MMIO(pm_control, val);
+ break;
+
+ case pm_interval:
+ WRITE_WO_MMIO(pm_interval, val);
+ break;
+
+ case pm_start_stop:
+ WRITE_WO_MMIO(pm_start_stop, val);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(cbe_write_pm);
+
+/*
+ * Get/set the size of a physical counter to either 16 or 32 bits.
+ */
+
+u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr)
+{
+ u32 pm_ctrl, size = 0;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ pm_ctrl = cbe_read_pm(cpu, pm_control);
+ size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(cbe_get_ctr_size);
+
+void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size)
+{
+ u32 pm_ctrl;
+
+ if (phys_ctr < NR_PHYS_CTRS) {
+ pm_ctrl = cbe_read_pm(cpu, pm_control);
+ switch (ctr_size) {
+ case 16:
+ pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr);
+ break;
+
+ case 32:
+ pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr);
+ break;
+ }
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+ }
+}
+EXPORT_SYMBOL_GPL(cbe_set_ctr_size);
+
+/*
+ * Enable/disable the entire performance monitoring unit.
+ * When we enable the PMU, all pending writes to counters get committed.
+ */
+
+void cbe_enable_pm(u32 cpu)
+{
+ struct cbe_pmd_shadow_regs *shadow_regs;
+ u32 pm_ctrl;
+
+ shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu);
+ shadow_regs->counter_value_in_latch = 0;
+
+ pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON;
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm);
+
+void cbe_disable_pm(u32 cpu)
+{
+ u32 pm_ctrl;
+ pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON;
+ cbe_write_pm(cpu, pm_control, pm_ctrl);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm);
+
+/*
+ * Reading from the trace_buffer.
+ * The trace buffer is two 64-bit registers. Reading from
+ * the second half automatically increments the trace_address.
+ */
+
+void cbe_read_trace_buffer(u32 cpu, u64 *buf)
+{
+ struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+ *buf++ = in_be64(&pmd_regs->trace_buffer_0_63);
+ *buf++ = in_be64(&pmd_regs->trace_buffer_64_127);
+}
+EXPORT_SYMBOL_GPL(cbe_read_trace_buffer);
+
+/*
+ * Enabling/disabling interrupts for the entire performance monitoring unit.
+ */
+
+u32 cbe_get_and_clear_pm_interrupts(u32 cpu)
+{
+ /* Reading pm_status clears the interrupt bits. */
+ return cbe_read_pm(cpu, pm_status);
+}
+EXPORT_SYMBOL_GPL(cbe_get_and_clear_pm_interrupts);
+
+void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask)
+{
+ /* Set which node and thread will handle the next interrupt. */
+ iic_set_interrupt_routing(cpu, thread, 0);
+
+ /* Enable the interrupt bits in the pm_status register. */
+ if (mask)
+ cbe_write_pm(cpu, pm_status, mask);
+}
+EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts);
+
+void cbe_disable_pm_interrupts(u32 cpu)
+{
+ cbe_get_and_clear_pm_interrupts(cpu);
+ cbe_write_pm(cpu, pm_status, 0);
+}
+EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts);
+
+static irqreturn_t cbe_pm_irq(int irq, void *dev_id)
+{
+ perf_irq(get_irq_regs());
+ return IRQ_HANDLED;
+}
+
+static int __init cbe_init_pm_irq(void)
+{
+ unsigned int irq;
+ int rc, node;
+
+ for_each_online_node(node) {
+ irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI |
+ (node << IIC_IRQ_NODE_SHIFT));
+ if (!irq) {
+ printk("ERROR: Unable to allocate irq for node %d\n",
+ node);
+ return -EINVAL;
+ }
+
+ rc = request_irq(irq, cbe_pm_irq,
+ 0, "cbe-pmu-0", NULL);
+ if (rc) {
+ printk("ERROR: Request for irq on node %d failed\n",
+ node);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+machine_arch_initcall(cell, cbe_init_pm_irq);
+
+void cbe_sync_irq(int node)
+{
+ unsigned int irq;
+
+ irq = irq_find_mapping(NULL,
+ IIC_IRQ_IOEX_PMI
+ | (node << IIC_IRQ_NODE_SHIFT));
+
+ if (!irq) {
+ printk(KERN_WARNING "ERROR, unable to get existing irq %d " \
+ "for node %d\n", irq, node);
+ return;
+ }
+
+ synchronize_irq(irq);
+}
+EXPORT_SYMBOL_GPL(cbe_sync_irq);
+
diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
new file mode 100644
index 000000000..f6b879265
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2006-2008, IBM Corporation.
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/of.h>
+
+#include <asm/kexec.h>
+#include <asm/reg.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/cell-regs.h>
+
+#include "ras.h"
+#include "pervasive.h"
+
+static void dump_fir(int cpu)
+{
+ struct cbe_pmd_regs __iomem *pregs = cbe_get_cpu_pmd_regs(cpu);
+ struct cbe_iic_regs __iomem *iregs = cbe_get_cpu_iic_regs(cpu);
+
+ if (pregs == NULL)
+ return;
+
+ /* Todo: do some nicer parsing of bits and based on them go down
+ * to other sub-units FIRs and not only IIC
+ */
+ printk(KERN_ERR "Global Checkstop FIR : 0x%016llx\n",
+ in_be64(&pregs->checkstop_fir));
+ printk(KERN_ERR "Global Recoverable FIR : 0x%016llx\n",
+ in_be64(&pregs->checkstop_fir));
+ printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n",
+ in_be64(&pregs->spec_att_mchk_fir));
+
+ if (iregs == NULL)
+ return;
+ printk(KERN_ERR "IOC FIR : 0x%016llx\n",
+ in_be64(&iregs->ioc_fir));
+
+}
+
+DEFINE_INTERRUPT_HANDLER(cbe_system_error_exception)
+{
+ int cpu = smp_processor_id();
+
+ printk(KERN_ERR "System Error Interrupt on CPU %d !\n", cpu);
+ dump_fir(cpu);
+ dump_stack();
+}
+
+DEFINE_INTERRUPT_HANDLER(cbe_maintenance_exception)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Nothing implemented for the maintenance interrupt at this point
+ */
+
+ printk(KERN_ERR "Unhandled Maintenance interrupt on CPU %d !\n", cpu);
+ dump_stack();
+}
+
+DEFINE_INTERRUPT_HANDLER(cbe_thermal_exception)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Nothing implemented for the thermal interrupt at this point
+ */
+
+ printk(KERN_ERR "Unhandled Thermal interrupt on CPU %d !\n", cpu);
+ dump_stack();
+}
+
+static int cbe_machine_check_handler(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+
+ printk(KERN_ERR "Machine Check Interrupt on CPU %d !\n", cpu);
+ dump_fir(cpu);
+
+ /* No recovery from this code now, lets continue */
+ return 0;
+}
+
+struct ptcal_area {
+ struct list_head list;
+ int nid;
+ int order;
+ struct page *pages;
+};
+
+static LIST_HEAD(ptcal_list);
+
+static int ptcal_start_tok, ptcal_stop_tok;
+
+static int __init cbe_ptcal_enable_on_node(int nid, int order)
+{
+ struct ptcal_area *area;
+ int ret = -ENOMEM;
+ unsigned long addr;
+
+ if (is_kdump_kernel())
+ rtas_call(ptcal_stop_tok, 1, 1, NULL, nid);
+
+ area = kmalloc(sizeof(*area), GFP_KERNEL);
+ if (!area)
+ goto out_err;
+
+ area->nid = nid;
+ area->order = order;
+ area->pages = __alloc_pages_node(area->nid,
+ GFP_KERNEL|__GFP_THISNODE,
+ area->order);
+
+ if (!area->pages) {
+ printk(KERN_WARNING "%s: no page on node %d\n",
+ __func__, area->nid);
+ goto out_free_area;
+ }
+
+ /*
+ * We move the ptcal area to the middle of the allocated
+ * page, in order to avoid prefetches in memcpy and similar
+ * functions stepping on it.
+ */
+ addr = __pa(page_address(area->pages)) + (PAGE_SIZE >> 1);
+ printk(KERN_DEBUG "%s: enabling PTCAL on node %d address=0x%016lx\n",
+ __func__, area->nid, addr);
+
+ ret = -EIO;
+ if (rtas_call(ptcal_start_tok, 3, 1, NULL, area->nid,
+ (unsigned int)(addr >> 32),
+ (unsigned int)(addr & 0xffffffff))) {
+ printk(KERN_ERR "%s: error enabling PTCAL on node %d!\n",
+ __func__, nid);
+ goto out_free_pages;
+ }
+
+ list_add(&area->list, &ptcal_list);
+
+ return 0;
+
+out_free_pages:
+ __free_pages(area->pages, area->order);
+out_free_area:
+ kfree(area);
+out_err:
+ return ret;
+}
+
+static int __init cbe_ptcal_enable(void)
+{
+ const u32 *size;
+ struct device_node *np;
+ int order, found_mic = 0;
+
+ np = of_find_node_by_path("/rtas");
+ if (!np)
+ return -ENODEV;
+
+ size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
+ if (!size) {
+ of_node_put(np);
+ return -ENODEV;
+ }
+
+ pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size);
+ order = get_order(*size);
+ of_node_put(np);
+
+ /* support for malta device trees, with be@/mic@ nodes */
+ for_each_node_by_type(np, "mic-tm") {
+ cbe_ptcal_enable_on_node(of_node_to_nid(np), order);
+ found_mic = 1;
+ }
+
+ if (found_mic)
+ return 0;
+
+ /* support for older device tree - use cpu nodes */
+ for_each_node_by_type(np, "cpu") {
+ const u32 *nid = of_get_property(np, "node-id", NULL);
+ if (!nid) {
+ printk(KERN_ERR "%s: node %pOF is missing node-id?\n",
+ __func__, np);
+ continue;
+ }
+ cbe_ptcal_enable_on_node(*nid, order);
+ found_mic = 1;
+ }
+
+ return found_mic ? 0 : -ENODEV;
+}
+
+static int cbe_ptcal_disable(void)
+{
+ struct ptcal_area *area, *tmp;
+ int ret = 0;
+
+ pr_debug("%s: disabling PTCAL\n", __func__);
+
+ list_for_each_entry_safe(area, tmp, &ptcal_list, list) {
+ /* disable ptcal on this node */
+ if (rtas_call(ptcal_stop_tok, 1, 1, NULL, area->nid)) {
+ printk(KERN_ERR "%s: error disabling PTCAL "
+ "on node %d!\n", __func__,
+ area->nid);
+ ret = -EIO;
+ continue;
+ }
+
+ /* ensure we can access the PTCAL area */
+ memset(page_address(area->pages), 0,
+ 1 << (area->order + PAGE_SHIFT));
+
+ /* clean up */
+ list_del(&area->list);
+ __free_pages(area->pages, area->order);
+ kfree(area);
+ }
+
+ return ret;
+}
+
+static int cbe_ptcal_notify_reboot(struct notifier_block *nb,
+ unsigned long code, void *data)
+{
+ return cbe_ptcal_disable();
+}
+
+static void cbe_ptcal_crash_shutdown(void)
+{
+ cbe_ptcal_disable();
+}
+
+static struct notifier_block cbe_ptcal_reboot_notifier = {
+ .notifier_call = cbe_ptcal_notify_reboot
+};
+
+#ifdef CONFIG_PPC_IBM_CELL_RESETBUTTON
+static int sysreset_hack;
+
+static int __init cbe_sysreset_init(void)
+{
+ struct cbe_pmd_regs __iomem *regs;
+
+ sysreset_hack = of_machine_is_compatible("IBM,CBPLUS-1.0");
+ if (!sysreset_hack)
+ return 0;
+
+ regs = cbe_get_cpu_pmd_regs(0);
+ if (!regs)
+ return 0;
+
+ /* Enable JTAG system-reset hack */
+ out_be32(&regs->fir_mode_reg,
+ in_be32(&regs->fir_mode_reg) |
+ CBE_PMD_FIR_MODE_M8);
+
+ return 0;
+}
+device_initcall(cbe_sysreset_init);
+
+int cbe_sysreset_hack(void)
+{
+ struct cbe_pmd_regs __iomem *regs;
+
+ /*
+ * The BMC can inject user triggered system reset exceptions,
+ * but cannot set the system reset reason in srr1,
+ * so check an extra register here.
+ */
+ if (sysreset_hack && (smp_processor_id() == 0)) {
+ regs = cbe_get_cpu_pmd_regs(0);
+ if (!regs)
+ return 0;
+ if (in_be64(&regs->ras_esc_0) & 0x0000ffff) {
+ out_be64(&regs->ras_esc_0, 0);
+ return 0;
+ }
+ }
+ return 1;
+}
+#endif /* CONFIG_PPC_IBM_CELL_RESETBUTTON */
+
+static int __init cbe_ptcal_init(void)
+{
+ int ret;
+ ptcal_start_tok = rtas_function_token(RTAS_FN_IBM_CBE_START_PTCAL);
+ ptcal_stop_tok = rtas_function_token(RTAS_FN_IBM_CBE_STOP_PTCAL);
+
+ if (ptcal_start_tok == RTAS_UNKNOWN_SERVICE
+ || ptcal_stop_tok == RTAS_UNKNOWN_SERVICE)
+ return -ENODEV;
+
+ ret = register_reboot_notifier(&cbe_ptcal_reboot_notifier);
+ if (ret)
+ goto out1;
+
+ ret = crash_shutdown_register(&cbe_ptcal_crash_shutdown);
+ if (ret)
+ goto out2;
+
+ return cbe_ptcal_enable();
+
+out2:
+ unregister_reboot_notifier(&cbe_ptcal_reboot_notifier);
+out1:
+ printk(KERN_ERR "Can't disable PTCAL, so not enabling\n");
+ return ret;
+}
+
+arch_initcall(cbe_ptcal_init);
+
+void __init cbe_ras_init(void)
+{
+ unsigned long hid0;
+
+ /*
+ * Enable System Error & thermal interrupts and wakeup conditions
+ */
+
+ hid0 = mfspr(SPRN_HID0);
+ hid0 |= HID0_CBE_THERM_INT_EN | HID0_CBE_THERM_WAKEUP |
+ HID0_CBE_SYSERR_INT_EN | HID0_CBE_SYSERR_WAKEUP;
+ mtspr(SPRN_HID0, hid0);
+ mb();
+
+ /*
+ * Install machine check handler. Leave setting of precise mode to
+ * what the firmware did for now
+ */
+ ppc_md.machine_check_exception = cbe_machine_check_handler;
+ mb();
+
+ /*
+ * For now, we assume that IOC_FIR is already set to forward some
+ * error conditions to the System Error handler. If that is not true
+ * then it will have to be fixed up here.
+ */
+}
diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h
new file mode 100644
index 000000000..226dbd48e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/ras.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef RAS_H
+#define RAS_H
+
+#include <asm/interrupt.h>
+
+DECLARE_INTERRUPT_HANDLER(cbe_system_error_exception);
+DECLARE_INTERRUPT_HANDLER(cbe_maintenance_exception);
+DECLARE_INTERRUPT_HANDLER(cbe_thermal_exception);
+
+extern void cbe_ras_init(void);
+
+#endif /* RAS_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
new file mode 100644
index 000000000..f64a1ef98
--- /dev/null
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * linux/arch/powerpc/platforms/cell/cell_setup.c
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ * Adapted from 'alpha' version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * Modified by PPC64 Team, IBM Corp
+ * Modified by Cell Team, IBM Deutschland Entwicklung GmbH
+ */
+#undef DEBUG
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/export.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/mutex.h>
+#include <linux/memory_hotplug.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/rtas.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/cputable.h>
+#include <asm/ppc-pci.h>
+#include <asm/irq.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/cell-regs.h>
+#include <asm/io-workarounds.h>
+
+#include "cell.h"
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static void cell_show_cpuinfo(struct seq_file *m)
+{
+ struct device_node *root;
+ const char *model = "";
+
+ root = of_find_node_by_path("/");
+ if (root)
+ model = of_get_property(root, "model", NULL);
+ seq_printf(m, "machine\t\t: CHRP %s\n", model);
+ of_node_put(root);
+}
+
+static void cell_progress(char *s, unsigned short hex)
+{
+ printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev)
+{
+ struct pci_controller *hose;
+ const char *s;
+ int i;
+
+ if (!machine_is(cell))
+ return;
+
+ /* We're searching for a direct child of the PHB */
+ if (dev->bus->self != NULL || dev->devfn != 0)
+ return;
+
+ hose = pci_bus_to_host(dev->bus);
+ if (hose == NULL)
+ return;
+
+ /* Only on PCIE */
+ if (!of_device_is_compatible(hose->dn, "pciex"))
+ return;
+
+ /* And only on axon */
+ s = of_get_property(hose->dn, "model", NULL);
+ if (!s || strcmp(s, "Axon") != 0)
+ return;
+
+ for (i = 0; i < PCI_BRIDGE_RESOURCES; i++) {
+ dev->resource[i].start = dev->resource[i].end = 0;
+ dev->resource[i].flags = 0;
+ }
+
+ printk(KERN_DEBUG "PCI: Hiding resources on Axon PCIE RC %s\n",
+ pci_name(dev));
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex);
+
+static int cell_setup_phb(struct pci_controller *phb)
+{
+ const char *model;
+ struct device_node *np;
+
+ int rc = rtas_setup_phb(phb);
+ if (rc)
+ return rc;
+
+ phb->controller_ops = cell_pci_controller_ops;
+
+ np = phb->dn;
+ model = of_get_property(np, "model", NULL);
+ if (model == NULL || !of_node_name_eq(np, "pci"))
+ return 0;
+
+ /* Setup workarounds for spider */
+ if (strcmp(model, "Spider"))
+ return 0;
+
+ iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
+ (void *)SPIDER_PCI_REG_BASE);
+ return 0;
+}
+
+static const struct of_device_id cell_bus_ids[] __initconst = {
+ { .type = "soc", },
+ { .compatible = "soc", },
+ { .type = "spider", },
+ { .type = "axon", },
+ { .type = "plb5", },
+ { .type = "plb4", },
+ { .type = "opb", },
+ { .type = "ebc", },
+ {},
+};
+
+static int __init cell_publish_devices(void)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ struct device_node *np;
+ int node;
+
+ /* Publish OF platform devices for southbridge IOs */
+ of_platform_bus_probe(NULL, cell_bus_ids, NULL);
+
+ /* On spider based blades, we need to manually create the OF
+ * platform devices for the PCI host bridges
+ */
+ for_each_child_of_node(root, np) {
+ if (!of_node_is_type(np, "pci") && !of_node_is_type(np, "pciex"))
+ continue;
+ of_platform_device_create(np, NULL, NULL);
+ }
+
+ of_node_put(root);
+
+ /* There is no device for the MIC memory controller, thus we create
+ * a platform device for it to attach the EDAC driver to.
+ */
+ for_each_online_node(node) {
+ if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
+ continue;
+ platform_device_register_simple("cbe-mic", node, NULL, 0);
+ }
+
+ return 0;
+}
+machine_subsys_initcall(cell, cell_publish_devices);
+
+static void __init mpic_init_IRQ(void)
+{
+ struct device_node *dn;
+ struct mpic *mpic;
+
+ for_each_node_by_name(dn, "interrupt-controller") {
+ if (!of_device_is_compatible(dn, "CBEA,platform-open-pic"))
+ continue;
+
+ /* The MPIC driver will get everything it needs from the
+ * device-tree, just pass 0 to all arguments
+ */
+ mpic = mpic_alloc(dn, 0, MPIC_SECONDARY | MPIC_NO_RESET,
+ 0, 0, " MPIC ");
+ if (mpic == NULL)
+ continue;
+ mpic_init(mpic);
+ }
+}
+
+
+static void __init cell_init_irq(void)
+{
+ iic_init_IRQ();
+ spider_init_IRQ();
+ mpic_init_IRQ();
+}
+
+static void __init cell_set_dabrx(void)
+{
+ mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static void __init cell_setup_arch(void)
+{
+#ifdef CONFIG_SPU_BASE
+ spu_priv1_ops = &spu_priv1_mmio_ops;
+ spu_management_ops = &spu_management_of_ops;
+#endif
+
+ cbe_regs_init();
+
+ cell_set_dabrx();
+
+#ifdef CONFIG_CBE_RAS
+ cbe_ras_init();
+#endif
+
+#ifdef CONFIG_SMP
+ smp_init_cell();
+#endif
+ /* init to some ~sane value until calibrate_delay() runs */
+ loops_per_jiffy = 50000000;
+
+ /* Find and initialize PCI host bridges */
+ init_pci_config_tokens();
+
+ cbe_pervasive_init();
+
+ mmio_nvram_init();
+}
+
+static int __init cell_probe(void)
+{
+ if (!of_machine_is_compatible("IBM,CBEA") &&
+ !of_machine_is_compatible("IBM,CPBW-1.0"))
+ return 0;
+
+ pm_power_off = rtas_power_off;
+
+ return 1;
+}
+
+define_machine(cell) {
+ .name = "Cell",
+ .probe = cell_probe,
+ .setup_arch = cell_setup_arch,
+ .show_cpuinfo = cell_show_cpuinfo,
+ .restart = rtas_restart,
+ .halt = rtas_halt,
+ .get_boot_time = rtas_get_boot_time,
+ .get_rtc_time = rtas_get_rtc_time,
+ .set_rtc_time = rtas_set_rtc_time,
+ .progress = cell_progress,
+ .init_IRQ = cell_init_irq,
+ .pci_setup_phb = cell_setup_phb,
+};
+
+struct pci_controller_ops cell_pci_controller_ops;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
new file mode 100644
index 000000000..30394c6f8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SMP support for BPA machines.
+ *
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * Plus various changes from other IBM teams...
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/pgtable.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/cputhreads.h>
+#include <asm/code-patching.h>
+
+#include "interrupt.h"
+#include <asm/udbg.h>
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/*
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
+ */
+static cpumask_t of_spin_map;
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware. For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ * 0 - failure
+ * 1 - success
+ */
+static inline int smp_startup_cpu(unsigned int lcpu)
+{
+ int status;
+ unsigned long start_here =
+ __pa(ppc_function_entry(generic_secondary_smp_init));
+ unsigned int pcpu;
+ int start_cpu;
+
+ if (cpumask_test_cpu(lcpu, &of_spin_map))
+ /* Already started by OF and sitting in spin loop */
+ return 1;
+
+ pcpu = get_hard_smp_processor_id(lcpu);
+
+ /*
+ * If the RTAS start-cpu token does not exist then presume the
+ * cpu is already spinning.
+ */
+ start_cpu = rtas_function_token(RTAS_FN_START_CPU);
+ if (start_cpu == RTAS_UNKNOWN_SERVICE)
+ return 1;
+
+ status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, lcpu);
+ if (status != 0) {
+ printk(KERN_ERR "start-cpu failed: %i\n", status);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void smp_cell_setup_cpu(int cpu)
+{
+ if (cpu != boot_cpuid)
+ iic_setup_cpu();
+
+ /*
+ * change default DABRX to allow user watchpoints
+ */
+ mtspr(SPRN_DABRX, DABRX_KERNEL | DABRX_USER);
+}
+
+static int smp_cell_kick_cpu(int nr)
+{
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
+
+ if (!smp_startup_cpu(nr))
+ return -ENOENT;
+
+ /*
+ * The processor is currently spinning, waiting for the
+ * cpu_start field to become non-zero After we set cpu_start,
+ * the processor will continue on to secondary_start
+ */
+ paca_ptrs[nr]->cpu_start = 1;
+
+ return 0;
+}
+
+static struct smp_ops_t bpa_iic_smp_ops = {
+ .message_pass = iic_message_pass,
+ .probe = iic_request_IPIs,
+ .kick_cpu = smp_cell_kick_cpu,
+ .setup_cpu = smp_cell_setup_cpu,
+ .cpu_bootable = smp_generic_cpu_bootable,
+};
+
+/* This is called very early */
+void __init smp_init_cell(void)
+{
+ int i;
+
+ DBG(" -> smp_init_cell()\n");
+
+ smp_ops = &bpa_iic_smp_ops;
+
+ /* Mark threads which are still spinning in hold loops. */
+ if (cpu_has_feature(CPU_FTR_SMT)) {
+ for_each_present_cpu(i) {
+ if (cpu_thread_in_core(i) == 0)
+ cpumask_set_cpu(i, &of_spin_map);
+ }
+ } else
+ cpumask_copy(&of_spin_map, cpu_present_mask);
+
+ cpumask_clear_cpu(boot_cpuid, &of_spin_map);
+
+ /* Non-lpar has additional take/give timebase */
+ if (rtas_function_token(RTAS_FN_FREEZE_TIME_BASE) != RTAS_UNKNOWN_SERVICE) {
+ smp_ops->give_timebase = rtas_give_timebase;
+ smp_ops->take_timebase = rtas_take_timebase;
+ }
+
+ DBG(" <- smp_init_cell()\n");
+}
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
new file mode 100644
index 000000000..68439445b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IO workarounds for PCI on Celleb/Cell platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+#include <asm/io-workarounds.h>
+
+#define SPIDER_PCI_DISABLE_PREFETCH
+
+struct spiderpci_iowa_private {
+ void __iomem *regs;
+};
+
+static void spiderpci_io_flush(struct iowa_bus *bus)
+{
+ struct spiderpci_iowa_private *priv;
+
+ priv = bus->private;
+ in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+ iosync();
+}
+
+#define SPIDER_PCI_MMIO_READ(name, ret) \
+static ret spiderpci_##name(const PCI_IO_ADDR addr) \
+{ \
+ ret val = __do_##name(addr); \
+ spiderpci_io_flush(iowa_mem_find_bus(addr)); \
+ return val; \
+}
+
+#define SPIDER_PCI_MMIO_READ_STR(name) \
+static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, \
+ unsigned long count) \
+{ \
+ __do_##name(addr, buf, count); \
+ spiderpci_io_flush(iowa_mem_find_bus(addr)); \
+}
+
+SPIDER_PCI_MMIO_READ(readb, u8)
+SPIDER_PCI_MMIO_READ(readw, u16)
+SPIDER_PCI_MMIO_READ(readl, u32)
+SPIDER_PCI_MMIO_READ(readq, u64)
+SPIDER_PCI_MMIO_READ(readw_be, u16)
+SPIDER_PCI_MMIO_READ(readl_be, u32)
+SPIDER_PCI_MMIO_READ(readq_be, u64)
+SPIDER_PCI_MMIO_READ_STR(readsb)
+SPIDER_PCI_MMIO_READ_STR(readsw)
+SPIDER_PCI_MMIO_READ_STR(readsl)
+
+static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+ unsigned long n)
+{
+ __do_memcpy_fromio(dest, src, n);
+ spiderpci_io_flush(iowa_mem_find_bus(src));
+}
+
+static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
+ void __iomem *regs)
+{
+ void *dummy_page_va;
+ dma_addr_t dummy_page_da;
+
+#ifdef SPIDER_PCI_DISABLE_PREFETCH
+ u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT);
+ pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val);
+ out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
+#endif /* SPIDER_PCI_DISABLE_PREFETCH */
+
+ /* setup dummy read */
+ /*
+ * On CellBlade, we can't know that which XDR memory is used by
+ * kmalloc() to allocate dummy_page_va.
+ * In order to improve the performance, the XDR which is used to
+ * allocate dummy_page_va is the nearest the spider-pci.
+ * We have to select the CBE which is the nearest the spider-pci
+ * to allocate memory from the best XDR, but I don't know that
+ * how to do.
+ *
+ * Celleb does not have this problem, because it has only one XDR.
+ */
+ dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!dummy_page_va) {
+ pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n");
+ return -1;
+ }
+
+ dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(phb->parent, dummy_page_da)) {
+ pr_err("SPIDER-IOWA:Map dummy page filed.\n");
+ kfree(dummy_page_va);
+ return -1;
+ }
+
+ out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da);
+
+ return 0;
+}
+
+int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
+{
+ void __iomem *regs = NULL;
+ struct spiderpci_iowa_private *priv;
+ struct device_node *np = bus->phb->dn;
+ struct resource r;
+ unsigned long offset = (unsigned long)data;
+
+ pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n",
+ np);
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ pr_err("SPIDERPCI-IOWA:"
+ "Can't allocate struct spiderpci_iowa_private");
+ return -1;
+ }
+
+ if (of_address_to_resource(np, 0, &r)) {
+ pr_err("SPIDERPCI-IOWA:Can't get resource.\n");
+ goto error;
+ }
+
+ regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE);
+ if (!regs) {
+ pr_err("SPIDERPCI-IOWA:ioremap failed.\n");
+ goto error;
+ }
+ priv->regs = regs;
+ bus->private = priv;
+
+ if (spiderpci_pci_setup_chip(bus->phb, regs))
+ goto error;
+
+ return 0;
+
+error:
+ kfree(priv);
+ bus->private = NULL;
+
+ if (regs)
+ iounmap(regs);
+
+ return -1;
+}
+
+struct ppc_pci_io spiderpci_ops = {
+ .readb = spiderpci_readb,
+ .readw = spiderpci_readw,
+ .readl = spiderpci_readl,
+ .readq = spiderpci_readq,
+ .readw_be = spiderpci_readw_be,
+ .readl_be = spiderpci_readl_be,
+ .readq_be = spiderpci_readq_be,
+ .readsb = spiderpci_readsb,
+ .readsw = spiderpci_readsw,
+ .readsl = spiderpci_readsl,
+ .memcpy_fromio = spiderpci_memcpy_fromio,
+};
+
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
new file mode 100644
index 000000000..11df737c8
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * External Interrupt Controller on Spider South Bridge
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/ioport.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/pgtable.h>
+
+#include <asm/io.h>
+
+#include "interrupt.h"
+
+/* register layout taken from Spider spec, table 7.4-4 */
+enum {
+ TIR_DEN = 0x004, /* Detection Enable Register */
+ TIR_MSK = 0x084, /* Mask Level Register */
+ TIR_EDC = 0x0c0, /* Edge Detection Clear Register */
+ TIR_PNDA = 0x100, /* Pending Register A */
+ TIR_PNDB = 0x104, /* Pending Register B */
+ TIR_CS = 0x144, /* Current Status Register */
+ TIR_LCSA = 0x150, /* Level Current Status Register A */
+ TIR_LCSB = 0x154, /* Level Current Status Register B */
+ TIR_LCSC = 0x158, /* Level Current Status Register C */
+ TIR_LCSD = 0x15c, /* Level Current Status Register D */
+ TIR_CFGA = 0x200, /* Setting Register A0 */
+ TIR_CFGB = 0x204, /* Setting Register B0 */
+ /* 0x208 ... 0x3ff Setting Register An/Bn */
+ TIR_PPNDA = 0x400, /* Packet Pending Register A */
+ TIR_PPNDB = 0x404, /* Packet Pending Register B */
+ TIR_PIERA = 0x408, /* Packet Output Error Register A */
+ TIR_PIERB = 0x40c, /* Packet Output Error Register B */
+ TIR_PIEN = 0x444, /* Packet Output Enable Register */
+ TIR_PIPND = 0x454, /* Packet Output Pending Register */
+ TIRDID = 0x484, /* Spider Device ID Register */
+ REISTIM = 0x500, /* Reissue Command Timeout Time Setting */
+ REISTIMEN = 0x504, /* Reissue Command Timeout Setting */
+ REISWAITEN = 0x508, /* Reissue Wait Control*/
+};
+
+#define SPIDER_CHIP_COUNT 4
+#define SPIDER_SRC_COUNT 64
+#define SPIDER_IRQ_INVALID 63
+
+struct spider_pic {
+ struct irq_domain *host;
+ void __iomem *regs;
+ unsigned int node_id;
+};
+static struct spider_pic spider_pics[SPIDER_CHIP_COUNT];
+
+static struct spider_pic *spider_irq_data_to_pic(struct irq_data *d)
+{
+ return irq_data_get_irq_chip_data(d);
+}
+
+static void __iomem *spider_get_irq_config(struct spider_pic *pic,
+ unsigned int src)
+{
+ return pic->regs + TIR_CFGA + 8 * src;
+}
+
+static void spider_unmask_irq(struct irq_data *d)
+{
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+ out_be32(cfg, in_be32(cfg) | 0x30000000u);
+}
+
+static void spider_mask_irq(struct irq_data *d)
+{
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ void __iomem *cfg = spider_get_irq_config(pic, irqd_to_hwirq(d));
+
+ out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+}
+
+static void spider_ack_irq(struct irq_data *d)
+{
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ unsigned int src = irqd_to_hwirq(d);
+
+ /* Reset edge detection logic if necessary
+ */
+ if (irqd_is_level_type(d))
+ return;
+
+ /* Only interrupts 47 to 50 can be set to edge */
+ if (src < 47 || src > 50)
+ return;
+
+ /* Perform the clear of the edge logic */
+ out_be32(pic->regs + TIR_EDC, 0x100 | (src & 0xf));
+}
+
+static int spider_set_irq_type(struct irq_data *d, unsigned int type)
+{
+ unsigned int sense = type & IRQ_TYPE_SENSE_MASK;
+ struct spider_pic *pic = spider_irq_data_to_pic(d);
+ unsigned int hw = irqd_to_hwirq(d);
+ void __iomem *cfg = spider_get_irq_config(pic, hw);
+ u32 old_mask;
+ u32 ic;
+
+ /* Note that only level high is supported for most interrupts */
+ if (sense != IRQ_TYPE_NONE && sense != IRQ_TYPE_LEVEL_HIGH &&
+ (hw < 47 || hw > 50))
+ return -EINVAL;
+
+ /* Decode sense type */
+ switch(sense) {
+ case IRQ_TYPE_EDGE_RISING:
+ ic = 0x3;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ ic = 0x2;
+ break;
+ case IRQ_TYPE_LEVEL_LOW:
+ ic = 0x0;
+ break;
+ case IRQ_TYPE_LEVEL_HIGH:
+ case IRQ_TYPE_NONE:
+ ic = 0x1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* Configure the source. One gross hack that was there before and
+ * that I've kept around is the priority to the BE which I set to
+ * be the same as the interrupt source number. I don't know whether
+ * that's supposed to make any kind of sense however, we'll have to
+ * decide that, but for now, I'm not changing the behaviour.
+ */
+ old_mask = in_be32(cfg) & 0x30000000u;
+ out_be32(cfg, old_mask | (ic << 24) | (0x7 << 16) |
+ (pic->node_id << 4) | 0xe);
+ out_be32(cfg + 4, (0x2 << 16) | (hw & 0xff));
+
+ return 0;
+}
+
+static struct irq_chip spider_pic = {
+ .name = "SPIDER",
+ .irq_unmask = spider_unmask_irq,
+ .irq_mask = spider_mask_irq,
+ .irq_ack = spider_ack_irq,
+ .irq_set_type = spider_set_irq_type,
+};
+
+static int spider_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ irq_set_chip_data(virq, h->host_data);
+ irq_set_chip_and_handler(virq, &spider_pic, handle_level_irq);
+
+ /* Set default irq type */
+ irq_set_irq_type(virq, IRQ_TYPE_NONE);
+
+ return 0;
+}
+
+static int spider_host_xlate(struct irq_domain *h, struct device_node *ct,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+
+{
+ /* Spider interrupts have 2 cells, first is the interrupt source,
+ * second, well, I don't know for sure yet ... We mask the top bits
+ * because old device-trees encode a node number in there
+ */
+ *out_hwirq = intspec[0] & 0x3f;
+ *out_flags = IRQ_TYPE_LEVEL_HIGH;
+ return 0;
+}
+
+static const struct irq_domain_ops spider_host_ops = {
+ .map = spider_host_map,
+ .xlate = spider_host_xlate,
+};
+
+static void spider_irq_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct spider_pic *pic = irq_desc_get_handler_data(desc);
+ unsigned int cs;
+
+ cs = in_be32(pic->regs + TIR_CS) >> 24;
+ if (cs != SPIDER_IRQ_INVALID)
+ generic_handle_domain_irq(pic->host, cs);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+/* For hooking up the cascade we have a problem. Our device-tree is
+ * crap and we don't know on which BE iic interrupt we are hooked on at
+ * least not the "standard" way. We can reconstitute it based on two
+ * informations though: which BE node we are connected to and whether
+ * we are connected to IOIF0 or IOIF1. Right now, we really only care
+ * about the IBM cell blade and we know that its firmware gives us an
+ * interrupt-map property which is pretty strange.
+ */
+static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
+{
+ unsigned int virq;
+ const u32 *imap, *tmp;
+ int imaplen, intsize, unit;
+ struct device_node *iic;
+ struct device_node *of_node;
+
+ of_node = irq_domain_get_of_node(pic->host);
+
+ /* First, we check whether we have a real "interrupts" in the device
+ * tree in case the device-tree is ever fixed
+ */
+ virq = irq_of_parse_and_map(of_node, 0);
+ if (virq)
+ return virq;
+
+ /* Now do the horrible hacks */
+ tmp = of_get_property(of_node, "#interrupt-cells", NULL);
+ if (tmp == NULL)
+ return 0;
+ intsize = *tmp;
+ imap = of_get_property(of_node, "interrupt-map", &imaplen);
+ if (imap == NULL || imaplen < (intsize + 1))
+ return 0;
+ iic = of_find_node_by_phandle(imap[intsize]);
+ if (iic == NULL)
+ return 0;
+ imap += intsize + 1;
+ tmp = of_get_property(iic, "#interrupt-cells", NULL);
+ if (tmp == NULL) {
+ of_node_put(iic);
+ return 0;
+ }
+ intsize = *tmp;
+ /* Assume unit is last entry of interrupt specifier */
+ unit = imap[intsize - 1];
+ /* Ok, we have a unit, now let's try to get the node */
+ tmp = of_get_property(iic, "ibm,interrupt-server-ranges", NULL);
+ if (tmp == NULL) {
+ of_node_put(iic);
+ return 0;
+ }
+ /* ugly as hell but works for now */
+ pic->node_id = (*tmp) >> 1;
+ of_node_put(iic);
+
+ /* Ok, now let's get cracking. You may ask me why I just didn't match
+ * the iic host from the iic OF node, but that way I'm still compatible
+ * with really really old old firmwares for which we don't have a node
+ */
+ /* Manufacture an IIC interrupt number of class 2 */
+ virq = irq_create_mapping(NULL,
+ (pic->node_id << IIC_IRQ_NODE_SHIFT) |
+ (2 << IIC_IRQ_CLASS_SHIFT) |
+ unit);
+ if (!virq)
+ printk(KERN_ERR "spider_pic: failed to map cascade !");
+ return virq;
+}
+
+
+static void __init spider_init_one(struct device_node *of_node, int chip,
+ unsigned long addr)
+{
+ struct spider_pic *pic = &spider_pics[chip];
+ int i, virq;
+
+ /* Map registers */
+ pic->regs = ioremap(addr, 0x1000);
+ if (pic->regs == NULL)
+ panic("spider_pic: can't map registers !");
+
+ /* Allocate a host */
+ pic->host = irq_domain_add_linear(of_node, SPIDER_SRC_COUNT,
+ &spider_host_ops, pic);
+ if (pic->host == NULL)
+ panic("spider_pic: can't allocate irq host !");
+
+ /* Go through all sources and disable them */
+ for (i = 0; i < SPIDER_SRC_COUNT; i++) {
+ void __iomem *cfg = pic->regs + TIR_CFGA + 8 * i;
+ out_be32(cfg, in_be32(cfg) & ~0x30000000u);
+ }
+
+ /* do not mask any interrupts because of level */
+ out_be32(pic->regs + TIR_MSK, 0x0);
+
+ /* enable interrupt packets to be output */
+ out_be32(pic->regs + TIR_PIEN, in_be32(pic->regs + TIR_PIEN) | 0x1);
+
+ /* Hook up the cascade interrupt to the iic and nodeid */
+ virq = spider_find_cascade_and_node(pic);
+ if (!virq)
+ return;
+ irq_set_handler_data(virq, pic);
+ irq_set_chained_handler(virq, spider_irq_cascade);
+
+ printk(KERN_INFO "spider_pic: node %d, addr: 0x%lx %pOF\n",
+ pic->node_id, addr, of_node);
+
+ /* Enable the interrupt detection enable bit. Do this last! */
+ out_be32(pic->regs + TIR_DEN, in_be32(pic->regs + TIR_DEN) | 0x1);
+}
+
+void __init spider_init_IRQ(void)
+{
+ struct resource r;
+ struct device_node *dn;
+ int chip = 0;
+
+ /* XXX node numbers are totally bogus. We _hope_ we get the device
+ * nodes in the right order here but that's definitely not guaranteed,
+ * we need to get the node from the device tree instead.
+ * There is currently no proper property for it (but our whole
+ * device-tree is bogus anyway) so all we can do is pray or maybe test
+ * the address and deduce the node-id
+ */
+ for_each_node_by_name(dn, "interrupt-controller") {
+ if (of_device_is_compatible(dn, "CBEA,platform-spider-pic")) {
+ if (of_address_to_resource(dn, 0, &r)) {
+ printk(KERN_WARNING "spider-pic: Failed\n");
+ continue;
+ }
+ } else if (of_device_is_compatible(dn, "sti,platform-spider-pic")
+ && (chip < 2)) {
+ static long hard_coded_pics[] =
+ { 0x24000008000ul, 0x34000008000ul};
+ r.start = hard_coded_pics[chip];
+ } else
+ continue;
+ spider_init_one(dn, chip++, r.start);
+ }
+}
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
new file mode 100644
index 000000000..dea6f0f25
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -0,0 +1,790 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/linux_logo.h>
+#include <linux/syscore_ops.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/xmon.h>
+#include <asm/kexec.h>
+
+const struct spu_management_ops *spu_management_ops;
+EXPORT_SYMBOL_GPL(spu_management_ops);
+
+const struct spu_priv1_ops *spu_priv1_ops;
+EXPORT_SYMBOL_GPL(spu_priv1_ops);
+
+struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
+EXPORT_SYMBOL_GPL(cbe_spu_info);
+
+/*
+ * The spufs fault-handling code needs to call force_sig_fault to raise signals
+ * on DMA errors. Export it here to avoid general kernel-wide access to this
+ * function
+ */
+EXPORT_SYMBOL_GPL(force_sig_fault);
+
+/*
+ * Protects cbe_spu_info and spu->number.
+ */
+static DEFINE_SPINLOCK(spu_lock);
+
+/*
+ * List of all spus in the system.
+ *
+ * This list is iterated by callers from irq context and callers that
+ * want to sleep. Thus modifications need to be done with both
+ * spu_full_list_lock and spu_full_list_mutex held, while iterating
+ * through it requires either of these locks.
+ *
+ * In addition spu_full_list_lock protects all assignments to
+ * spu->mm.
+ */
+static LIST_HEAD(spu_full_list);
+static DEFINE_SPINLOCK(spu_full_list_lock);
+static DEFINE_MUTEX(spu_full_list_mutex);
+
+void spu_invalidate_slbs(struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ unsigned long flags;
+
+ spin_lock_irqsave(&spu->register_lock, flags);
+ if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
+ out_be64(&priv2->slb_invalidate_all_W, 0UL);
+ spin_unlock_irqrestore(&spu->register_lock, flags);
+}
+EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
+
+/* This is called by the MM core when a segment size is changed, to
+ * request a flush of all the SPEs using a given mm
+ */
+void spu_flush_all_slbs(struct mm_struct *mm)
+{
+ struct spu *spu;
+ unsigned long flags;
+
+ spin_lock_irqsave(&spu_full_list_lock, flags);
+ list_for_each_entry(spu, &spu_full_list, full_list) {
+ if (spu->mm == mm)
+ spu_invalidate_slbs(spu);
+ }
+ spin_unlock_irqrestore(&spu_full_list_lock, flags);
+}
+
+/* The hack below stinks... try to do something better one of
+ * these days... Does it even work properly with NR_CPUS == 1 ?
+ */
+static inline void mm_needs_global_tlbie(struct mm_struct *mm)
+{
+ int nr = (NR_CPUS > 1) ? NR_CPUS : NR_CPUS + 1;
+
+ /* Global TLBIE broadcast required with SPEs. */
+ bitmap_fill(cpumask_bits(mm_cpumask(mm)), nr);
+}
+
+void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&spu_full_list_lock, flags);
+ spu->mm = mm;
+ spin_unlock_irqrestore(&spu_full_list_lock, flags);
+ if (mm)
+ mm_needs_global_tlbie(mm);
+}
+EXPORT_SYMBOL_GPL(spu_associate_mm);
+
+int spu_64k_pages_available(void)
+{
+ return mmu_psize_defs[MMU_PAGE_64K].shift != 0;
+}
+EXPORT_SYMBOL_GPL(spu_64k_pages_available);
+
+static void spu_restart_dma(struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags))
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+ else {
+ set_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+ mb();
+ }
+}
+
+static inline void spu_load_slb(struct spu *spu, int slbe, struct copro_slb *slb)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n",
+ __func__, slbe, slb->vsid, slb->esid);
+
+ out_be64(&priv2->slb_index_W, slbe);
+ /* set invalid before writing vsid */
+ out_be64(&priv2->slb_esid_RW, 0);
+ /* now it's safe to write the vsid */
+ out_be64(&priv2->slb_vsid_RW, slb->vsid);
+ /* setting the new esid makes the entry valid again */
+ out_be64(&priv2->slb_esid_RW, slb->esid);
+}
+
+static int __spu_trap_data_seg(struct spu *spu, unsigned long ea)
+{
+ struct copro_slb slb;
+ int ret;
+
+ ret = copro_calculate_slb(spu->mm, ea, &slb);
+ if (ret)
+ return ret;
+
+ spu_load_slb(spu, spu->slb_replace, &slb);
+
+ spu->slb_replace++;
+ if (spu->slb_replace >= 8)
+ spu->slb_replace = 0;
+
+ spu_restart_dma(spu);
+ spu->stats.slb_flt++;
+ return 0;
+}
+
+extern int hash_page(unsigned long ea, unsigned long access,
+ unsigned long trap, unsigned long dsisr); //XXX
+static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
+{
+ int ret;
+
+ pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea);
+
+ /*
+ * Handle kernel space hash faults immediately. User hash
+ * faults need to be deferred to process context.
+ */
+ if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
+ (get_region_id(ea) != USER_REGION_ID)) {
+
+ spin_unlock(&spu->register_lock);
+ ret = hash_page(ea,
+ _PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED,
+ 0x300, dsisr);
+ spin_lock(&spu->register_lock);
+
+ if (!ret) {
+ spu_restart_dma(spu);
+ return 0;
+ }
+ }
+
+ spu->class_1_dar = ea;
+ spu->class_1_dsisr = dsisr;
+
+ spu->stop_callback(spu, 1);
+
+ spu->class_1_dar = 0;
+ spu->class_1_dsisr = 0;
+
+ return 0;
+}
+
+static void __spu_kernel_slb(void *addr, struct copro_slb *slb)
+{
+ unsigned long ea = (unsigned long)addr;
+ u64 llp;
+
+ if (get_region_id(ea) == LINEAR_MAP_REGION_ID)
+ llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ else
+ llp = mmu_psize_defs[mmu_virtual_psize].sllp;
+
+ slb->vsid = (get_kernel_vsid(ea, MMU_SEGSIZE_256M) << SLB_VSID_SHIFT) |
+ SLB_VSID_KERNEL | llp;
+ slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
+}
+
+/**
+ * Given an array of @nr_slbs SLB entries, @slbs, return non-zero if the
+ * address @new_addr is present.
+ */
+static inline int __slb_present(struct copro_slb *slbs, int nr_slbs,
+ void *new_addr)
+{
+ unsigned long ea = (unsigned long)new_addr;
+ int i;
+
+ for (i = 0; i < nr_slbs; i++)
+ if (!((slbs[i].esid ^ ea) & ESID_MASK))
+ return 1;
+
+ return 0;
+}
+
+/**
+ * Setup the SPU kernel SLBs, in preparation for a context save/restore. We
+ * need to map both the context save area, and the save/restore code.
+ *
+ * Because the lscsa and code may cross segment boundaries, we check to see
+ * if mappings are required for the start and end of each range. We currently
+ * assume that the mappings are smaller that one segment - if not, something
+ * is seriously wrong.
+ */
+void spu_setup_kernel_slbs(struct spu *spu, struct spu_lscsa *lscsa,
+ void *code, int code_size)
+{
+ struct copro_slb slbs[4];
+ int i, nr_slbs = 0;
+ /* start and end addresses of both mappings */
+ void *addrs[] = {
+ lscsa, (void *)lscsa + sizeof(*lscsa) - 1,
+ code, code + code_size - 1
+ };
+
+ /* check the set of addresses, and create a new entry in the slbs array
+ * if there isn't already a SLB for that address */
+ for (i = 0; i < ARRAY_SIZE(addrs); i++) {
+ if (__slb_present(slbs, nr_slbs, addrs[i]))
+ continue;
+
+ __spu_kernel_slb(addrs[i], &slbs[nr_slbs]);
+ nr_slbs++;
+ }
+
+ spin_lock_irq(&spu->register_lock);
+ /* Add the set of SLBs */
+ for (i = 0; i < nr_slbs; i++)
+ spu_load_slb(spu, i, &slbs[i]);
+ spin_unlock_irq(&spu->register_lock);
+}
+EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs);
+
+static irqreturn_t
+spu_irq_class_0(int irq, void *data)
+{
+ struct spu *spu;
+ unsigned long stat, mask;
+
+ spu = data;
+
+ spin_lock(&spu->register_lock);
+ mask = spu_int_mask_get(spu, 0);
+ stat = spu_int_stat_get(spu, 0) & mask;
+
+ spu->class_0_pending |= stat;
+ spu->class_0_dar = spu_mfc_dar_get(spu);
+ spu->stop_callback(spu, 0);
+ spu->class_0_pending = 0;
+ spu->class_0_dar = 0;
+
+ spu_int_stat_clear(spu, 0, stat);
+ spin_unlock(&spu->register_lock);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t
+spu_irq_class_1(int irq, void *data)
+{
+ struct spu *spu;
+ unsigned long stat, mask, dar, dsisr;
+
+ spu = data;
+
+ /* atomically read & clear class1 status. */
+ spin_lock(&spu->register_lock);
+ mask = spu_int_mask_get(spu, 1);
+ stat = spu_int_stat_get(spu, 1) & mask;
+ dar = spu_mfc_dar_get(spu);
+ dsisr = spu_mfc_dsisr_get(spu);
+ if (stat & CLASS1_STORAGE_FAULT_INTR)
+ spu_mfc_dsisr_set(spu, 0ul);
+ spu_int_stat_clear(spu, 1, stat);
+
+ pr_debug("%s: %lx %lx %lx %lx\n", __func__, mask, stat,
+ dar, dsisr);
+
+ if (stat & CLASS1_SEGMENT_FAULT_INTR)
+ __spu_trap_data_seg(spu, dar);
+
+ if (stat & CLASS1_STORAGE_FAULT_INTR)
+ __spu_trap_data_map(spu, dar, dsisr);
+
+ spu->class_1_dsisr = 0;
+ spu->class_1_dar = 0;
+
+ spin_unlock(&spu->register_lock);
+
+ return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static irqreturn_t
+spu_irq_class_2(int irq, void *data)
+{
+ struct spu *spu;
+ unsigned long stat;
+ unsigned long mask;
+ const int mailbox_intrs =
+ CLASS2_MAILBOX_THRESHOLD_INTR | CLASS2_MAILBOX_INTR;
+
+ spu = data;
+ spin_lock(&spu->register_lock);
+ stat = spu_int_stat_get(spu, 2);
+ mask = spu_int_mask_get(spu, 2);
+ /* ignore interrupts we're not waiting for */
+ stat &= mask;
+ /* mailbox interrupts are level triggered. mask them now before
+ * acknowledging */
+ if (stat & mailbox_intrs)
+ spu_int_mask_and(spu, 2, ~(stat & mailbox_intrs));
+ /* acknowledge all interrupts before the callbacks */
+ spu_int_stat_clear(spu, 2, stat);
+
+ pr_debug("class 2 interrupt %d, %lx, %lx\n", irq, stat, mask);
+
+ if (stat & CLASS2_MAILBOX_INTR)
+ spu->ibox_callback(spu);
+
+ if (stat & CLASS2_SPU_STOP_INTR)
+ spu->stop_callback(spu, 2);
+
+ if (stat & CLASS2_SPU_HALT_INTR)
+ spu->stop_callback(spu, 2);
+
+ if (stat & CLASS2_SPU_DMA_TAG_GROUP_COMPLETE_INTR)
+ spu->mfc_callback(spu);
+
+ if (stat & CLASS2_MAILBOX_THRESHOLD_INTR)
+ spu->wbox_callback(spu);
+
+ spu->stats.class2_intr++;
+
+ spin_unlock(&spu->register_lock);
+
+ return stat ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int __init spu_request_irqs(struct spu *spu)
+{
+ int ret = 0;
+
+ if (spu->irqs[0]) {
+ snprintf(spu->irq_c0, sizeof (spu->irq_c0), "spe%02d.0",
+ spu->number);
+ ret = request_irq(spu->irqs[0], spu_irq_class_0,
+ 0, spu->irq_c0, spu);
+ if (ret)
+ goto bail0;
+ }
+ if (spu->irqs[1]) {
+ snprintf(spu->irq_c1, sizeof (spu->irq_c1), "spe%02d.1",
+ spu->number);
+ ret = request_irq(spu->irqs[1], spu_irq_class_1,
+ 0, spu->irq_c1, spu);
+ if (ret)
+ goto bail1;
+ }
+ if (spu->irqs[2]) {
+ snprintf(spu->irq_c2, sizeof (spu->irq_c2), "spe%02d.2",
+ spu->number);
+ ret = request_irq(spu->irqs[2], spu_irq_class_2,
+ 0, spu->irq_c2, spu);
+ if (ret)
+ goto bail2;
+ }
+ return 0;
+
+bail2:
+ if (spu->irqs[1])
+ free_irq(spu->irqs[1], spu);
+bail1:
+ if (spu->irqs[0])
+ free_irq(spu->irqs[0], spu);
+bail0:
+ return ret;
+}
+
+static void spu_free_irqs(struct spu *spu)
+{
+ if (spu->irqs[0])
+ free_irq(spu->irqs[0], spu);
+ if (spu->irqs[1])
+ free_irq(spu->irqs[1], spu);
+ if (spu->irqs[2])
+ free_irq(spu->irqs[2], spu);
+}
+
+void spu_init_channels(struct spu *spu)
+{
+ static const struct {
+ unsigned channel;
+ unsigned count;
+ } zero_list[] = {
+ { 0x00, 1, }, { 0x01, 1, }, { 0x03, 1, }, { 0x04, 1, },
+ { 0x18, 1, }, { 0x19, 1, }, { 0x1b, 1, }, { 0x1d, 1, },
+ }, count_list[] = {
+ { 0x00, 0, }, { 0x03, 0, }, { 0x04, 0, }, { 0x15, 16, },
+ { 0x17, 1, }, { 0x18, 0, }, { 0x19, 0, }, { 0x1b, 0, },
+ { 0x1c, 1, }, { 0x1d, 0, }, { 0x1e, 1, },
+ };
+ struct spu_priv2 __iomem *priv2;
+ int i;
+
+ priv2 = spu->priv2;
+
+ /* initialize all channel data to zero */
+ for (i = 0; i < ARRAY_SIZE(zero_list); i++) {
+ int count;
+
+ out_be64(&priv2->spu_chnlcntptr_RW, zero_list[i].channel);
+ for (count = 0; count < zero_list[i].count; count++)
+ out_be64(&priv2->spu_chnldata_RW, 0);
+ }
+
+ /* initialize channel counts to meaningful values */
+ for (i = 0; i < ARRAY_SIZE(count_list); i++) {
+ out_be64(&priv2->spu_chnlcntptr_RW, count_list[i].channel);
+ out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
+ }
+}
+EXPORT_SYMBOL_GPL(spu_init_channels);
+
+static struct bus_type spu_subsys = {
+ .name = "spu",
+ .dev_name = "spu",
+};
+
+int spu_add_dev_attr(struct device_attribute *attr)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list)
+ device_create_file(&spu->dev, attr);
+ mutex_unlock(&spu_full_list_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr);
+
+int spu_add_dev_attr_group(const struct attribute_group *attrs)
+{
+ struct spu *spu;
+ int rc = 0;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list) {
+ rc = sysfs_create_group(&spu->dev.kobj, attrs);
+
+ /* we're in trouble here, but try unwinding anyway */
+ if (rc) {
+ printk(KERN_ERR "%s: can't create sysfs group '%s'\n",
+ __func__, attrs->name);
+
+ list_for_each_entry_continue_reverse(spu,
+ &spu_full_list, full_list)
+ sysfs_remove_group(&spu->dev.kobj, attrs);
+ break;
+ }
+ }
+
+ mutex_unlock(&spu_full_list_mutex);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(spu_add_dev_attr_group);
+
+
+void spu_remove_dev_attr(struct device_attribute *attr)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list)
+ device_remove_file(&spu->dev, attr);
+ mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr);
+
+void spu_remove_dev_attr_group(const struct attribute_group *attrs)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list)
+ sysfs_remove_group(&spu->dev.kobj, attrs);
+ mutex_unlock(&spu_full_list_mutex);
+}
+EXPORT_SYMBOL_GPL(spu_remove_dev_attr_group);
+
+static int __init spu_create_dev(struct spu *spu)
+{
+ int ret;
+
+ spu->dev.id = spu->number;
+ spu->dev.bus = &spu_subsys;
+ ret = device_register(&spu->dev);
+ if (ret) {
+ printk(KERN_ERR "Can't register SPU %d with sysfs\n",
+ spu->number);
+ return ret;
+ }
+
+ sysfs_add_device_to_node(&spu->dev, spu->node);
+
+ return 0;
+}
+
+static int __init create_spu(void *data)
+{
+ struct spu *spu;
+ int ret;
+ static int number;
+ unsigned long flags;
+
+ ret = -ENOMEM;
+ spu = kzalloc(sizeof (*spu), GFP_KERNEL);
+ if (!spu)
+ goto out;
+
+ spu->alloc_state = SPU_FREE;
+
+ spin_lock_init(&spu->register_lock);
+ spin_lock(&spu_lock);
+ spu->number = number++;
+ spin_unlock(&spu_lock);
+
+ ret = spu_create_spu(spu, data);
+
+ if (ret)
+ goto out_free;
+
+ spu_mfc_sdr_setup(spu);
+ spu_mfc_sr1_set(spu, 0x33);
+ ret = spu_request_irqs(spu);
+ if (ret)
+ goto out_destroy;
+
+ ret = spu_create_dev(spu);
+ if (ret)
+ goto out_free_irqs;
+
+ mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+ list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
+ cbe_spu_info[spu->node].n_spus++;
+ mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
+
+ mutex_lock(&spu_full_list_mutex);
+ spin_lock_irqsave(&spu_full_list_lock, flags);
+ list_add(&spu->full_list, &spu_full_list);
+ spin_unlock_irqrestore(&spu_full_list_lock, flags);
+ mutex_unlock(&spu_full_list_mutex);
+
+ spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
+ spu->stats.tstamp = ktime_get_ns();
+
+ INIT_LIST_HEAD(&spu->aff_list);
+
+ goto out;
+
+out_free_irqs:
+ spu_free_irqs(spu);
+out_destroy:
+ spu_destroy_spu(spu);
+out_free:
+ kfree(spu);
+out:
+ return ret;
+}
+
+static const char *spu_state_names[] = {
+ "user", "system", "iowait", "idle"
+};
+
+static unsigned long long spu_acct_time(struct spu *spu,
+ enum spu_utilization_state state)
+{
+ unsigned long long time = spu->stats.times[state];
+
+ /*
+ * If the spu is idle or the context is stopped, utilization
+ * statistics are not updated. Apply the time delta from the
+ * last recorded state of the spu.
+ */
+ if (spu->stats.util_state == state)
+ time += ktime_get_ns() - spu->stats.tstamp;
+
+ return time / NSEC_PER_MSEC;
+}
+
+
+static ssize_t spu_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct spu *spu = container_of(dev, struct spu, dev);
+
+ return sprintf(buf, "%s %llu %llu %llu %llu "
+ "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+ spu_state_names[spu->stats.util_state],
+ spu_acct_time(spu, SPU_UTIL_USER),
+ spu_acct_time(spu, SPU_UTIL_SYSTEM),
+ spu_acct_time(spu, SPU_UTIL_IOWAIT),
+ spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
+ spu->stats.vol_ctx_switch,
+ spu->stats.invol_ctx_switch,
+ spu->stats.slb_flt,
+ spu->stats.hash_flt,
+ spu->stats.min_flt,
+ spu->stats.maj_flt,
+ spu->stats.class2_intr,
+ spu->stats.libassist);
+}
+
+static DEVICE_ATTR(stat, 0444, spu_stat_show, NULL);
+
+#ifdef CONFIG_KEXEC_CORE
+
+struct crash_spu_info {
+ struct spu *spu;
+ u32 saved_spu_runcntl_RW;
+ u32 saved_spu_status_R;
+ u32 saved_spu_npc_RW;
+ u64 saved_mfc_sr1_RW;
+ u64 saved_mfc_dar;
+ u64 saved_mfc_dsisr;
+};
+
+#define CRASH_NUM_SPUS 16 /* Enough for current hardware */
+static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
+
+static void crash_kexec_stop_spus(void)
+{
+ struct spu *spu;
+ int i;
+ u64 tmp;
+
+ for (i = 0; i < CRASH_NUM_SPUS; i++) {
+ if (!crash_spu_info[i].spu)
+ continue;
+
+ spu = crash_spu_info[i].spu;
+
+ crash_spu_info[i].saved_spu_runcntl_RW =
+ in_be32(&spu->problem->spu_runcntl_RW);
+ crash_spu_info[i].saved_spu_status_R =
+ in_be32(&spu->problem->spu_status_R);
+ crash_spu_info[i].saved_spu_npc_RW =
+ in_be32(&spu->problem->spu_npc_RW);
+
+ crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu);
+ crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu);
+ tmp = spu_mfc_sr1_get(spu);
+ crash_spu_info[i].saved_mfc_sr1_RW = tmp;
+
+ tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ spu_mfc_sr1_set(spu, tmp);
+
+ __delay(200);
+ }
+}
+
+static void __init crash_register_spus(struct list_head *list)
+{
+ struct spu *spu;
+ int ret;
+
+ list_for_each_entry(spu, list, full_list) {
+ if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
+ continue;
+
+ crash_spu_info[spu->number].spu = spu;
+ }
+
+ ret = crash_shutdown_register(&crash_kexec_stop_spus);
+ if (ret)
+ printk(KERN_ERR "Could not register SPU crash handler");
+}
+
+#else
+static inline void crash_register_spus(struct list_head *list)
+{
+}
+#endif
+
+static void spu_shutdown(void)
+{
+ struct spu *spu;
+
+ mutex_lock(&spu_full_list_mutex);
+ list_for_each_entry(spu, &spu_full_list, full_list) {
+ spu_free_irqs(spu);
+ spu_destroy_spu(spu);
+ }
+ mutex_unlock(&spu_full_list_mutex);
+}
+
+static struct syscore_ops spu_syscore_ops = {
+ .shutdown = spu_shutdown,
+};
+
+static int __init init_spu_base(void)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ mutex_init(&cbe_spu_info[i].list_mutex);
+ INIT_LIST_HEAD(&cbe_spu_info[i].spus);
+ }
+
+ if (!spu_management_ops)
+ goto out;
+
+ /* create system subsystem for spus */
+ ret = subsys_system_register(&spu_subsys, NULL);
+ if (ret)
+ goto out;
+
+ ret = spu_enumerate_spus(create_spu);
+
+ if (ret < 0) {
+ printk(KERN_WARNING "%s: Error initializing spus\n",
+ __func__);
+ goto out_unregister_subsys;
+ }
+
+ if (ret > 0)
+ fb_append_extra_logo(&logo_spe_clut224, ret);
+
+ mutex_lock(&spu_full_list_mutex);
+ xmon_register_spus(&spu_full_list);
+ crash_register_spus(&spu_full_list);
+ mutex_unlock(&spu_full_list_mutex);
+ spu_add_dev_attr(&dev_attr_stat);
+ register_syscore_ops(&spu_syscore_ops);
+
+ spu_init_affinity();
+
+ return 0;
+
+ out_unregister_subsys:
+ bus_unregister(&spu_subsys);
+ out:
+ return ret;
+}
+device_initcall(init_spu_base);
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
new file mode 100644
index 000000000..e780c14c5
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * System call callback functions for SPUs
+ */
+
+#undef DEBUG
+
+#include <linux/kallsyms.h>
+#include <linux/export.h>
+#include <linux/syscalls.h>
+
+#include <asm/spu.h>
+#include <asm/syscalls.h>
+#include <asm/unistd.h>
+
+/*
+ * This table defines the system calls that an SPU can call.
+ * It is currently a subset of the 64 bit powerpc system calls,
+ * with the exact semantics.
+ *
+ * The reasons for disabling some of the system calls are:
+ * 1. They interact with the way SPU syscalls are handled
+ * and we can't let them execute ever:
+ * restart_syscall, exit, for, execve, ptrace, ...
+ * 2. They are deprecated and replaced by other means:
+ * uselib, pciconfig_*, sysfs, ...
+ * 3. They are somewhat interacting with the system in a way
+ * we don't want an SPU to:
+ * reboot, init_module, mount, kexec_load
+ * 4. They are optional and we can't rely on them being
+ * linked into the kernel. Unfortunately, the cond_syscall
+ * helper does not work here as it does not add the necessary
+ * opd symbols:
+ * mbind, mq_open, ipc, ...
+ */
+
+static const syscall_fn spu_syscall_table[] = {
+#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
+#define __SYSCALL(nr, entry) [nr] = (void *) entry,
+#include <asm/syscall_table_spu.h>
+};
+
+long spu_sys_callback(struct spu_syscall_block *s)
+{
+ syscall_fn syscall;
+
+ if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) {
+ pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret);
+ return -ENOSYS;
+ }
+
+ syscall = spu_syscall_table[s->nr_ret];
+
+ pr_debug("SPU-syscall "
+ "%pSR:syscall%lld(%llx, %llx, %llx, %llx, %llx, %llx)\n",
+ syscall,
+ s->nr_ret,
+ s->parm[0], s->parm[1], s->parm[2],
+ s->parm[3], s->parm[4], s->parm[5]);
+
+ return syscall(s->parm[0], s->parm[1], s->parm[2],
+ s->parm[3], s->parm[4], s->parm[5]);
+}
+EXPORT_SYMBOL_GPL(spu_sys_callback);
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
new file mode 100644
index 000000000..f464a1f2e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * spu management operations for of based platforms
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ * (C) Copyright 2007 TOSHIBA CORPORATION
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/export.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+
+#include "spufs/spufs.h"
+#include "interrupt.h"
+#include "spu_priv1_mmio.h"
+
+struct device_node *spu_devnode(struct spu *spu)
+{
+ return spu->devnode;
+}
+
+EXPORT_SYMBOL_GPL(spu_devnode);
+
+static u64 __init find_spu_unit_number(struct device_node *spe)
+{
+ const unsigned int *prop;
+ int proplen;
+
+ /* new device trees should provide the physical-id attribute */
+ prop = of_get_property(spe, "physical-id", &proplen);
+ if (proplen == 4)
+ return (u64)*prop;
+
+ /* celleb device tree provides the unit-id */
+ prop = of_get_property(spe, "unit-id", &proplen);
+ if (proplen == 4)
+ return (u64)*prop;
+
+ /* legacy device trees provide the id in the reg attribute */
+ prop = of_get_property(spe, "reg", &proplen);
+ if (proplen == 4)
+ return (u64)*prop;
+
+ return 0;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ iounmap(spu->priv1);
+ iounmap(spu->priv2);
+ iounmap(spu->problem);
+ iounmap((__force u8 __iomem *)spu->local_store);
+}
+
+static int __init spu_map_interrupts_old(struct spu *spu,
+ struct device_node *np)
+{
+ unsigned int isrc;
+ const u32 *tmp;
+ int nid;
+
+ /* Get the interrupt source unit from the device-tree */
+ tmp = of_get_property(np, "isrc", NULL);
+ if (!tmp)
+ return -ENODEV;
+ isrc = tmp[0];
+
+ tmp = of_get_property(np->parent->parent, "node-id", NULL);
+ if (!tmp) {
+ printk(KERN_WARNING "%s: can't find node-id\n", __func__);
+ nid = spu->node;
+ } else
+ nid = tmp[0];
+
+ /* Add the node number */
+ isrc |= nid << IIC_IRQ_NODE_SHIFT;
+
+ /* Now map interrupts of all 3 classes */
+ spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc);
+ spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc);
+ spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc);
+
+ /* Right now, we only fail if class 2 failed */
+ if (!spu->irqs[2])
+ return -EINVAL;
+
+ return 0;
+}
+
+static void __iomem * __init spu_map_prop_old(struct spu *spu,
+ struct device_node *n,
+ const char *name)
+{
+ const struct address_prop {
+ unsigned long address;
+ unsigned int len;
+ } __attribute__((packed)) *prop;
+ int proplen;
+
+ prop = of_get_property(n, name, &proplen);
+ if (prop == NULL || proplen != sizeof (struct address_prop))
+ return NULL;
+
+ return ioremap(prop->address, prop->len);
+}
+
+static int __init spu_map_device_old(struct spu *spu)
+{
+ struct device_node *node = spu->devnode;
+ const char *prop;
+ int ret;
+
+ ret = -ENODEV;
+ spu->name = of_get_property(node, "name", NULL);
+ if (!spu->name)
+ goto out;
+
+ prop = of_get_property(node, "local-store", NULL);
+ if (!prop)
+ goto out;
+ spu->local_store_phys = *(unsigned long *)prop;
+
+ /* we use local store as ram, not io memory */
+ spu->local_store = (void __force *)
+ spu_map_prop_old(spu, node, "local-store");
+ if (!spu->local_store)
+ goto out;
+
+ prop = of_get_property(node, "problem", NULL);
+ if (!prop)
+ goto out_unmap;
+ spu->problem_phys = *(unsigned long *)prop;
+
+ spu->problem = spu_map_prop_old(spu, node, "problem");
+ if (!spu->problem)
+ goto out_unmap;
+
+ spu->priv2 = spu_map_prop_old(spu, node, "priv2");
+ if (!spu->priv2)
+ goto out_unmap;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ spu->priv1 = spu_map_prop_old(spu, node, "priv1");
+ if (!spu->priv1)
+ goto out_unmap;
+ }
+
+ ret = 0;
+ goto out;
+
+out_unmap:
+ spu_unmap(spu);
+out:
+ return ret;
+}
+
+static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
+{
+ int i;
+
+ for (i=0; i < 3; i++) {
+ spu->irqs[i] = irq_of_parse_and_map(np, i);
+ if (!spu->irqs[i])
+ goto err;
+ }
+ return 0;
+
+err:
+ pr_debug("failed to map irq %x for spu %s\n", i, spu->name);
+ for (; i >= 0; i--) {
+ if (spu->irqs[i])
+ irq_dispose_mapping(spu->irqs[i]);
+ }
+ return -EINVAL;
+}
+
+static int __init spu_map_resource(struct spu *spu, int nr,
+ void __iomem** virt, unsigned long *phys)
+{
+ struct device_node *np = spu->devnode;
+ struct resource resource = { };
+ unsigned long len;
+ int ret;
+
+ ret = of_address_to_resource(np, nr, &resource);
+ if (ret)
+ return ret;
+ if (phys)
+ *phys = resource.start;
+ len = resource_size(&resource);
+ *virt = ioremap(resource.start, len);
+ if (!*virt)
+ return -EINVAL;
+ return 0;
+}
+
+static int __init spu_map_device(struct spu *spu)
+{
+ struct device_node *np = spu->devnode;
+ int ret = -ENODEV;
+
+ spu->name = of_get_property(np, "name", NULL);
+ if (!spu->name)
+ goto out;
+
+ ret = spu_map_resource(spu, 0, (void __iomem**)&spu->local_store,
+ &spu->local_store_phys);
+ if (ret) {
+ pr_debug("spu_new: failed to map %pOF resource 0\n",
+ np);
+ goto out;
+ }
+ ret = spu_map_resource(spu, 1, (void __iomem**)&spu->problem,
+ &spu->problem_phys);
+ if (ret) {
+ pr_debug("spu_new: failed to map %pOF resource 1\n",
+ np);
+ goto out_unmap;
+ }
+ ret = spu_map_resource(spu, 2, (void __iomem**)&spu->priv2, NULL);
+ if (ret) {
+ pr_debug("spu_new: failed to map %pOF resource 2\n",
+ np);
+ goto out_unmap;
+ }
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ ret = spu_map_resource(spu, 3,
+ (void __iomem**)&spu->priv1, NULL);
+ if (ret) {
+ pr_debug("spu_new: failed to map %pOF resource 3\n",
+ np);
+ goto out_unmap;
+ }
+ pr_debug("spu_new: %pOF maps:\n", np);
+ pr_debug(" local store : 0x%016lx -> 0x%p\n",
+ spu->local_store_phys, spu->local_store);
+ pr_debug(" problem state : 0x%016lx -> 0x%p\n",
+ spu->problem_phys, spu->problem);
+ pr_debug(" priv2 : 0x%p\n", spu->priv2);
+ pr_debug(" priv1 : 0x%p\n", spu->priv1);
+
+ return 0;
+
+out_unmap:
+ spu_unmap(spu);
+out:
+ pr_debug("failed to map spe %s: %d\n", spu->name, ret);
+ return ret;
+}
+
+static int __init of_enumerate_spus(int (*fn)(void *data))
+{
+ int ret;
+ struct device_node *node;
+ unsigned int n = 0;
+
+ ret = -ENODEV;
+ for_each_node_by_type(node, "spe") {
+ ret = fn(node);
+ if (ret) {
+ printk(KERN_WARNING "%s: Error initializing %pOFn\n",
+ __func__, node);
+ of_node_put(node);
+ break;
+ }
+ n++;
+ }
+ return ret ? ret : n;
+}
+
+static int __init of_create_spu(struct spu *spu, void *data)
+{
+ int ret;
+ struct device_node *spe = (struct device_node *)data;
+ static int legacy_map = 0, legacy_irq = 0;
+
+ spu->devnode = of_node_get(spe);
+ spu->spe_id = find_spu_unit_number(spe);
+
+ spu->node = of_node_to_nid(spe);
+ if (spu->node >= MAX_NUMNODES) {
+ printk(KERN_WARNING "SPE %pOF on node %d ignored,"
+ " node number too big\n", spe, spu->node);
+ printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n");
+ ret = -ENODEV;
+ goto out;
+ }
+
+ ret = spu_map_device(spu);
+ if (ret) {
+ if (!legacy_map) {
+ legacy_map = 1;
+ printk(KERN_WARNING "%s: Legacy device tree found, "
+ "trying to map old style\n", __func__);
+ }
+ ret = spu_map_device_old(spu);
+ if (ret) {
+ printk(KERN_ERR "Unable to map %s\n",
+ spu->name);
+ goto out;
+ }
+ }
+
+ ret = spu_map_interrupts(spu, spe);
+ if (ret) {
+ if (!legacy_irq) {
+ legacy_irq = 1;
+ printk(KERN_WARNING "%s: Legacy device tree found, "
+ "trying old style irq\n", __func__);
+ }
+ ret = spu_map_interrupts_old(spu, spe);
+ if (ret) {
+ printk(KERN_ERR "%s: could not map interrupts\n",
+ spu->name);
+ goto out_unmap;
+ }
+ }
+
+ pr_debug("Using SPE %s %p %p %p %p %d\n", spu->name,
+ spu->local_store, spu->problem, spu->priv1,
+ spu->priv2, spu->number);
+ goto out;
+
+out_unmap:
+ spu_unmap(spu);
+out:
+ return ret;
+}
+
+static int of_destroy_spu(struct spu *spu)
+{
+ spu_unmap(spu);
+ of_node_put(spu->devnode);
+ return 0;
+}
+
+static void enable_spu_by_master_run(struct spu_context *ctx)
+{
+ ctx->ops->master_start(ctx);
+}
+
+static void disable_spu_by_master_run(struct spu_context *ctx)
+{
+ ctx->ops->master_stop(ctx);
+}
+
+/* Hardcoded affinity idxs for qs20 */
+#define QS20_SPES_PER_BE 8
+static int qs20_reg_idxs[QS20_SPES_PER_BE] = { 0, 2, 4, 6, 7, 5, 3, 1 };
+static int qs20_reg_memory[QS20_SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
+
+static struct spu *__init spu_lookup_reg(int node, u32 reg)
+{
+ struct spu *spu;
+ const u32 *spu_reg;
+
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ spu_reg = of_get_property(spu_devnode(spu), "reg", NULL);
+ if (*spu_reg == reg)
+ return spu;
+ }
+ return NULL;
+}
+
+static void __init init_affinity_qs20_harcoded(void)
+{
+ int node, i;
+ struct spu *last_spu, *spu;
+ u32 reg;
+
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ last_spu = NULL;
+ for (i = 0; i < QS20_SPES_PER_BE; i++) {
+ reg = qs20_reg_idxs[i];
+ spu = spu_lookup_reg(node, reg);
+ if (!spu)
+ continue;
+ spu->has_mem_affinity = qs20_reg_memory[reg];
+ if (last_spu)
+ list_add_tail(&spu->aff_list,
+ &last_spu->aff_list);
+ last_spu = spu;
+ }
+ }
+}
+
+static int __init of_has_vicinity(void)
+{
+ struct device_node *dn;
+
+ for_each_node_by_type(dn, "spe") {
+ if (of_property_present(dn, "vicinity")) {
+ of_node_put(dn);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static struct spu *__init devnode_spu(int cbe, struct device_node *dn)
+{
+ struct spu *spu;
+
+ list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
+ if (spu_devnode(spu) == dn)
+ return spu;
+ return NULL;
+}
+
+static struct spu * __init
+neighbour_spu(int cbe, struct device_node *target, struct device_node *avoid)
+{
+ struct spu *spu;
+ struct device_node *spu_dn;
+ const phandle *vic_handles;
+ int lenp, i;
+
+ list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
+ spu_dn = spu_devnode(spu);
+ if (spu_dn == avoid)
+ continue;
+ vic_handles = of_get_property(spu_dn, "vicinity", &lenp);
+ for (i=0; i < (lenp / sizeof(phandle)); i++) {
+ if (vic_handles[i] == target->phandle)
+ return spu;
+ }
+ }
+ return NULL;
+}
+
+static void __init init_affinity_node(int cbe)
+{
+ struct spu *spu, *last_spu;
+ struct device_node *vic_dn, *last_spu_dn;
+ phandle avoid_ph;
+ const phandle *vic_handles;
+ int lenp, i, added;
+
+ last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu,
+ cbe_list);
+ avoid_ph = 0;
+ for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
+ last_spu_dn = spu_devnode(last_spu);
+ vic_handles = of_get_property(last_spu_dn, "vicinity", &lenp);
+
+ /*
+ * Walk through each phandle in vicinity property of the spu
+ * (typically two vicinity phandles per spe node)
+ */
+ for (i = 0; i < (lenp / sizeof(phandle)); i++) {
+ if (vic_handles[i] == avoid_ph)
+ continue;
+
+ vic_dn = of_find_node_by_phandle(vic_handles[i]);
+ if (!vic_dn)
+ continue;
+
+ if (of_node_name_eq(vic_dn, "spe") ) {
+ spu = devnode_spu(cbe, vic_dn);
+ avoid_ph = last_spu_dn->phandle;
+ } else {
+ /*
+ * "mic-tm" and "bif0" nodes do not have
+ * vicinity property. So we need to find the
+ * spe which has vic_dn as neighbour, but
+ * skipping the one we came from (last_spu_dn)
+ */
+ spu = neighbour_spu(cbe, vic_dn, last_spu_dn);
+ if (!spu)
+ continue;
+ if (of_node_name_eq(vic_dn, "mic-tm")) {
+ last_spu->has_mem_affinity = 1;
+ spu->has_mem_affinity = 1;
+ }
+ avoid_ph = vic_dn->phandle;
+ }
+
+ of_node_put(vic_dn);
+
+ list_add_tail(&spu->aff_list, &last_spu->aff_list);
+ last_spu = spu;
+ break;
+ }
+ }
+}
+
+static void __init init_affinity_fw(void)
+{
+ int cbe;
+
+ for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
+ init_affinity_node(cbe);
+}
+
+static int __init init_affinity(void)
+{
+ if (of_has_vicinity()) {
+ init_affinity_fw();
+ } else {
+ if (of_machine_is_compatible("IBM,CPBW-1.0"))
+ init_affinity_qs20_harcoded();
+ else
+ printk("No affinity configuration found\n");
+ }
+
+ return 0;
+}
+
+const struct spu_management_ops spu_management_of_ops = {
+ .enumerate_spus = of_enumerate_spus,
+ .create_spu = of_create_spu,
+ .destroy_spu = of_destroy_spu,
+ .enable_spu = enable_spu_by_master_run,
+ .disable_spu = disable_spu_by_master_run,
+ .init_affinity = init_affinity,
+};
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
new file mode 100644
index 000000000..d150e3987
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * Copyright 2006 Sony Corp.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/ptrace.h>
+#include <linux/wait.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <linux/sched.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/firmware.h>
+
+#include "interrupt.h"
+#include "spu_priv1_mmio.h"
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+ u64 old_mask;
+
+ old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+ out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+ u64 old_mask;
+
+ old_mask = in_be64(&spu->priv1->int_mask_RW[class]);
+ out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+ out_be64(&spu->priv1->int_mask_RW[class], mask);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+ return in_be64(&spu->priv1->int_mask_RW[class]);
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+ out_be64(&spu->priv1->int_stat_RW[class], stat);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+ return in_be64(&spu->priv1->int_stat_RW[class]);
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+ u64 target;
+ u64 route;
+
+ if (nr_cpus_node(spu->node)) {
+ const struct cpumask *spumask = cpumask_of_node(spu->node),
+ *cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+ if (!cpumask_intersects(spumask, cpumask))
+ return;
+ }
+
+ target = iic_get_target_id(cpu);
+ route = target << 48 | target << 32 | target << 16;
+ out_be64(&spu->priv1->int_route_RW, route);
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_dar_RW);
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_dsisr_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+ out_be64(&spu->priv1->mfc_dsisr_RW, dsisr);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+ out_be64(&spu->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1));
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+ out_be64(&spu->priv1->mfc_sr1_RW, sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_sr1_RW);
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+ out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->mfc_tclass_id_RW);
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+ out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul);
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+ out_be64(&spu->priv1->resource_allocation_groupID_RW, id);
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->resource_allocation_groupID_RW);
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+ out_be64(&spu->priv1->resource_allocation_enable_RW, enable);
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+ return in_be64(&spu->priv1->resource_allocation_enable_RW);
+}
+
+const struct spu_priv1_ops spu_priv1_mmio_ops =
+{
+ .int_mask_and = int_mask_and,
+ .int_mask_or = int_mask_or,
+ .int_mask_set = int_mask_set,
+ .int_mask_get = int_mask_get,
+ .int_stat_clear = int_stat_clear,
+ .int_stat_get = int_stat_get,
+ .cpu_affinity_set = cpu_affinity_set,
+ .mfc_dar_get = mfc_dar_get,
+ .mfc_dsisr_get = mfc_dsisr_get,
+ .mfc_dsisr_set = mfc_dsisr_set,
+ .mfc_sdr_setup = mfc_sdr_setup,
+ .mfc_sr1_set = mfc_sr1_set,
+ .mfc_sr1_get = mfc_sr1_get,
+ .mfc_tclass_id_set = mfc_tclass_id_set,
+ .mfc_tclass_id_get = mfc_tclass_id_get,
+ .tlb_invalidate = tlb_invalidate,
+ .resource_allocation_groupID_set = resource_allocation_groupID_set,
+ .resource_allocation_groupID_get = resource_allocation_groupID_get,
+ .resource_allocation_enable_set = resource_allocation_enable_set,
+ .resource_allocation_enable_get = resource_allocation_enable_get,
+};
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
new file mode 100644
index 000000000..04f0db339
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * spu hypervisor abstraction for direct hardware access.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#ifndef SPU_PRIV1_MMIO_H
+#define SPU_PRIV1_MMIO_H
+
+struct device_node *spu_devnode(struct spu *spu);
+
+#endif /* SPU_PRIV1_MMIO_H */
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c
new file mode 100644
index 000000000..87ad7d563
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU file system -- system call stubs
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * (C) Copyright 2006-2007, IBM Corporation
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/syscalls.h>
+#include <linux/rcupdate.h>
+#include <linux/binfmts.h>
+
+#include <asm/spu.h>
+
+/* protected by rcu */
+static struct spufs_calls *spufs_calls;
+
+#ifdef CONFIG_SPU_FS_MODULE
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+ struct spufs_calls *calls = NULL;
+
+ rcu_read_lock();
+ calls = rcu_dereference(spufs_calls);
+ if (calls && !try_module_get(calls->owner))
+ calls = NULL;
+ rcu_read_unlock();
+
+ return calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls)
+{
+ BUG_ON(calls != spufs_calls);
+
+ /* we don't need to rcu this, as we hold a reference to the module */
+ module_put(spufs_calls->owner);
+}
+
+#else /* !defined CONFIG_SPU_FS_MODULE */
+
+static inline struct spufs_calls *spufs_calls_get(void)
+{
+ return spufs_calls;
+}
+
+static inline void spufs_calls_put(struct spufs_calls *calls) { }
+
+#endif /* CONFIG_SPU_FS_MODULE */
+
+SYSCALL_DEFINE4(spu_create, const char __user *, name, unsigned int, flags,
+ umode_t, mode, int, neighbor_fd)
+{
+ long ret;
+ struct spufs_calls *calls;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return -ENOSYS;
+
+ if (flags & SPU_CREATE_AFFINITY_SPU) {
+ struct fd neighbor = fdget(neighbor_fd);
+ ret = -EBADF;
+ if (neighbor.file) {
+ ret = calls->create_thread(name, flags, mode, neighbor.file);
+ fdput(neighbor);
+ }
+ } else
+ ret = calls->create_thread(name, flags, mode, NULL);
+
+ spufs_calls_put(calls);
+ return ret;
+}
+
+SYSCALL_DEFINE3(spu_run,int, fd, __u32 __user *, unpc, __u32 __user *, ustatus)
+{
+ long ret;
+ struct fd arg;
+ struct spufs_calls *calls;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return -ENOSYS;
+
+ ret = -EBADF;
+ arg = fdget(fd);
+ if (arg.file) {
+ ret = calls->spu_run(arg.file, unpc, ustatus);
+ fdput(arg);
+ }
+
+ spufs_calls_put(calls);
+ return ret;
+}
+
+#ifdef CONFIG_COREDUMP
+int elf_coredump_extra_notes_size(void)
+{
+ struct spufs_calls *calls;
+ int ret;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return 0;
+
+ ret = calls->coredump_extra_notes_size();
+
+ spufs_calls_put(calls);
+
+ return ret;
+}
+
+int elf_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+ struct spufs_calls *calls;
+ int ret;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return 0;
+
+ ret = calls->coredump_extra_notes_write(cprm);
+
+ spufs_calls_put(calls);
+
+ return ret;
+}
+#endif
+
+void notify_spus_active(void)
+{
+ struct spufs_calls *calls;
+
+ calls = spufs_calls_get();
+ if (!calls)
+ return;
+
+ calls->notify_spus_active();
+ spufs_calls_put(calls);
+
+ return;
+}
+
+int register_spu_syscalls(struct spufs_calls *calls)
+{
+ if (spufs_calls)
+ return -EBUSY;
+
+ rcu_assign_pointer(spufs_calls, calls);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(register_spu_syscalls);
+
+void unregister_spu_syscalls(struct spufs_calls *calls)
+{
+ BUG_ON(spufs_calls->owner != calls->owner);
+ RCU_INIT_POINTER(spufs_calls, NULL);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(unregister_spu_syscalls);
diff --git a/arch/powerpc/platforms/cell/spufs/.gitignore b/arch/powerpc/platforms/cell/spufs/.gitignore
new file mode 100644
index 000000000..5f3eb224f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+spu_save_dump.h
+spu_restore_dump.h
diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile
new file mode 100644
index 000000000..52e4c80ec
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/Makefile
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_SPU_FS) += spufs.o
+spufs-y += inode.o file.o context.o syscalls.o
+spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o
+spufs-y += switch.o fault.o lscsa_alloc.o
+spufs-$(CONFIG_COREDUMP) += coredump.o
+
+# magic for the trace events
+CFLAGS_sched.o := -I$(src)
+
+# Rules to build switch.o with the help of SPU tool chain
+SPU_CROSS := spu-
+SPU_CC := $(SPU_CROSS)gcc
+SPU_AS := $(SPU_CROSS)gcc
+SPU_LD := $(SPU_CROSS)ld
+SPU_OBJCOPY := $(SPU_CROSS)objcopy
+SPU_CFLAGS := -O2 -Wall -I$(srctree)/include -D__KERNEL__
+SPU_AFLAGS := -c -D__ASSEMBLY__ -I$(srctree)/include -D__KERNEL__
+SPU_LDFLAGS := -N -Ttext=0x0
+
+$(obj)/switch.o: $(obj)/spu_save_dump.h $(obj)/spu_restore_dump.h
+clean-files := spu_save_dump.h spu_restore_dump.h
+
+# Compile SPU files
+ cmd_spu_cc = $(SPU_CC) $(SPU_CFLAGS) -c -o $@ $<
+quiet_cmd_spu_cc = SPU_CC $@
+$(obj)/spu_%.o: $(src)/spu_%.c
+ $(call if_changed,spu_cc)
+
+# Assemble SPU files
+ cmd_spu_as = $(SPU_AS) $(SPU_AFLAGS) -o $@ $<
+quiet_cmd_spu_as = SPU_AS $@
+$(obj)/spu_%.o: $(src)/spu_%.S
+ $(call if_changed,spu_as)
+
+# Link SPU Executables
+ cmd_spu_ld = $(SPU_LD) $(SPU_LDFLAGS) -o $@ $^
+quiet_cmd_spu_ld = SPU_LD $@
+$(obj)/spu_%: $(obj)/spu_%_crt0.o $(obj)/spu_%.o
+ $(call if_changed,spu_ld)
+
+# Copy into binary format
+ cmd_spu_objcopy = $(SPU_OBJCOPY) -O binary $< $@
+quiet_cmd_spu_objcopy = OBJCOPY $@
+$(obj)/spu_%.bin: $(src)/spu_%
+ $(call if_changed,spu_objcopy)
+
+# create C code from ELF executable
+cmd_hexdump = ( \
+ echo "/*" ; \
+ echo " * $*_dump.h: Copyright (C) 2005 IBM." ; \
+ echo " * Hex-dump auto generated from $*.c." ; \
+ echo " * Do not edit!" ; \
+ echo " */" ; \
+ echo "static unsigned int $*_code[] " \
+ "__attribute__((__aligned__(128))) = {" ; \
+ hexdump -v -e '"0x" 4/1 "%02x" "," "\n"' $< ; \
+ echo "};" ; \
+ ) > $@
+quiet_cmd_hexdump = HEXDUMP $@
+$(obj)/%_dump.h: $(obj)/%.bin
+ $(call if_changed,hexdump)
diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
new file mode 100644
index 000000000..28a34a2ca
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* backing_ops.c - query/set operations on saved SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * These register operations allow SPUFS to operate on saved
+ * SPU contexts rather than hardware.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/poll.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+/*
+ * Reads/writes to various problem and priv2 registers require
+ * state changes, i.e. generate SPU events, modify channel
+ * counts, etc.
+ */
+
+static void gen_spu_event(struct spu_context *ctx, u32 event)
+{
+ u64 ch0_cnt;
+ u64 ch0_data;
+ u64 ch1_data;
+
+ ch0_cnt = ctx->csa.spu_chnlcnt_RW[0];
+ ch0_data = ctx->csa.spu_chnldata_RW[0];
+ ch1_data = ctx->csa.spu_chnldata_RW[1];
+ ctx->csa.spu_chnldata_RW[0] |= event;
+ if ((ch0_cnt == 0) && !(ch0_data & event) && (ch1_data & event)) {
+ ctx->csa.spu_chnlcnt_RW[0] = 1;
+ }
+}
+
+static int spu_backing_mbox_read(struct spu_context *ctx, u32 * data)
+{
+ u32 mbox_stat;
+ int ret = 0;
+
+ spin_lock(&ctx->csa.register_lock);
+ mbox_stat = ctx->csa.prob.mb_stat_R;
+ if (mbox_stat & 0x0000ff) {
+ /* Read the first available word.
+ * Implementation note: the depth
+ * of pu_mb_R is currently 1.
+ */
+ *data = ctx->csa.prob.pu_mb_R;
+ ctx->csa.prob.mb_stat_R &= ~(0x0000ff);
+ ctx->csa.spu_chnlcnt_RW[28] = 1;
+ gen_spu_event(ctx, MFC_PU_MAILBOX_AVAILABLE_EVENT);
+ ret = 4;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+ return ret;
+}
+
+static u32 spu_backing_mbox_stat_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.mb_stat_R;
+}
+
+static __poll_t spu_backing_mbox_stat_poll(struct spu_context *ctx,
+ __poll_t events)
+{
+ __poll_t ret;
+ u32 stat;
+
+ ret = 0;
+ spin_lock_irq(&ctx->csa.register_lock);
+ stat = ctx->csa.prob.mb_stat_R;
+
+ /* if the requested event is there, return the poll
+ mask, otherwise enable the interrupt to get notified,
+ but first mark any pending interrupts as done so
+ we don't get woken up unnecessarily */
+
+ if (events & (EPOLLIN | EPOLLRDNORM)) {
+ if (stat & 0xff0000)
+ ret |= EPOLLIN | EPOLLRDNORM;
+ else {
+ ctx->csa.priv1.int_stat_class2_RW &=
+ ~CLASS2_MAILBOX_INTR;
+ ctx->csa.priv1.int_mask_class2_RW |=
+ CLASS2_ENABLE_MAILBOX_INTR;
+ }
+ }
+ if (events & (EPOLLOUT | EPOLLWRNORM)) {
+ if (stat & 0x00ff00)
+ ret = EPOLLOUT | EPOLLWRNORM;
+ else {
+ ctx->csa.priv1.int_stat_class2_RW &=
+ ~CLASS2_MAILBOX_THRESHOLD_INTR;
+ ctx->csa.priv1.int_mask_class2_RW |=
+ CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+ }
+ }
+ spin_unlock_irq(&ctx->csa.register_lock);
+ return ret;
+}
+
+static int spu_backing_ibox_read(struct spu_context *ctx, u32 * data)
+{
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ if (ctx->csa.prob.mb_stat_R & 0xff0000) {
+ /* Read the first available word.
+ * Implementation note: the depth
+ * of puint_mb_R is currently 1.
+ */
+ *data = ctx->csa.priv2.puint_mb_R;
+ ctx->csa.prob.mb_stat_R &= ~(0xff0000);
+ ctx->csa.spu_chnlcnt_RW[30] = 1;
+ gen_spu_event(ctx, MFC_PU_INT_MAILBOX_AVAILABLE_EVENT);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt */
+ ctx->csa.priv1.int_mask_class2_RW |= CLASS2_ENABLE_MAILBOX_INTR;
+ ret = 0;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+ return ret;
+}
+
+static int spu_backing_wbox_write(struct spu_context *ctx, u32 data)
+{
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ if ((ctx->csa.prob.mb_stat_R) & 0x00ff00) {
+ int slot = ctx->csa.spu_chnlcnt_RW[29];
+ int avail = (ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8;
+
+ /* We have space to write wbox_data.
+ * Implementation note: the depth
+ * of spu_mb_W is currently 4.
+ */
+ BUG_ON(avail != (4 - slot));
+ ctx->csa.spu_mailbox_data[slot] = data;
+ ctx->csa.spu_chnlcnt_RW[29] = ++slot;
+ ctx->csa.prob.mb_stat_R &= ~(0x00ff00);
+ ctx->csa.prob.mb_stat_R |= (((4 - slot) & 0xff) << 8);
+ gen_spu_event(ctx, MFC_SPU_MAILBOX_WRITTEN_EVENT);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt when space
+ becomes available */
+ ctx->csa.priv1.int_mask_class2_RW |=
+ CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR;
+ ret = 0;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+ return ret;
+}
+
+static u32 spu_backing_signal1_read(struct spu_context *ctx)
+{
+ return ctx->csa.spu_chnldata_RW[3];
+}
+
+static void spu_backing_signal1_write(struct spu_context *ctx, u32 data)
+{
+ spin_lock(&ctx->csa.register_lock);
+ if (ctx->csa.priv2.spu_cfg_RW & 0x1)
+ ctx->csa.spu_chnldata_RW[3] |= data;
+ else
+ ctx->csa.spu_chnldata_RW[3] = data;
+ ctx->csa.spu_chnlcnt_RW[3] = 1;
+ gen_spu_event(ctx, MFC_SIGNAL_1_EVENT);
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static u32 spu_backing_signal2_read(struct spu_context *ctx)
+{
+ return ctx->csa.spu_chnldata_RW[4];
+}
+
+static void spu_backing_signal2_write(struct spu_context *ctx, u32 data)
+{
+ spin_lock(&ctx->csa.register_lock);
+ if (ctx->csa.priv2.spu_cfg_RW & 0x2)
+ ctx->csa.spu_chnldata_RW[4] |= data;
+ else
+ ctx->csa.spu_chnldata_RW[4] = data;
+ ctx->csa.spu_chnlcnt_RW[4] = 1;
+ gen_spu_event(ctx, MFC_SIGNAL_2_EVENT);
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+ u64 tmp;
+
+ spin_lock(&ctx->csa.register_lock);
+ tmp = ctx->csa.priv2.spu_cfg_RW;
+ if (val)
+ tmp |= 1;
+ else
+ tmp &= ~1;
+ ctx->csa.priv2.spu_cfg_RW = tmp;
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal1_type_get(struct spu_context *ctx)
+{
+ return ((ctx->csa.priv2.spu_cfg_RW & 1) != 0);
+}
+
+static void spu_backing_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+ u64 tmp;
+
+ spin_lock(&ctx->csa.register_lock);
+ tmp = ctx->csa.priv2.spu_cfg_RW;
+ if (val)
+ tmp |= 2;
+ else
+ tmp &= ~2;
+ ctx->csa.priv2.spu_cfg_RW = tmp;
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static u64 spu_backing_signal2_type_get(struct spu_context *ctx)
+{
+ return ((ctx->csa.priv2.spu_cfg_RW & 2) != 0);
+}
+
+static u32 spu_backing_npc_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.spu_npc_RW;
+}
+
+static void spu_backing_npc_write(struct spu_context *ctx, u32 val)
+{
+ ctx->csa.prob.spu_npc_RW = val;
+}
+
+static u32 spu_backing_status_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.spu_status_R;
+}
+
+static char *spu_backing_get_ls(struct spu_context *ctx)
+{
+ return ctx->csa.lscsa->ls;
+}
+
+static void spu_backing_privcntl_write(struct spu_context *ctx, u64 val)
+{
+ ctx->csa.priv2.spu_privcntl_RW = val;
+}
+
+static u32 spu_backing_runcntl_read(struct spu_context *ctx)
+{
+ return ctx->csa.prob.spu_runcntl_RW;
+}
+
+static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
+{
+ spin_lock(&ctx->csa.register_lock);
+ ctx->csa.prob.spu_runcntl_RW = val;
+ if (val & SPU_RUNCNTL_RUNNABLE) {
+ ctx->csa.prob.spu_status_R &=
+ ~SPU_STATUS_STOPPED_BY_STOP &
+ ~SPU_STATUS_STOPPED_BY_HALT &
+ ~SPU_STATUS_SINGLE_STEP &
+ ~SPU_STATUS_INVALID_INSTR &
+ ~SPU_STATUS_INVALID_CH;
+ ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
+ } else {
+ ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
+ }
+ spin_unlock(&ctx->csa.register_lock);
+}
+
+static void spu_backing_runcntl_stop(struct spu_context *ctx)
+{
+ spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP);
+}
+
+static void spu_backing_master_start(struct spu_context *ctx)
+{
+ struct spu_state *csa = &ctx->csa;
+ u64 sr1;
+
+ spin_lock(&csa->register_lock);
+ sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ csa->priv1.mfc_sr1_RW = sr1;
+ spin_unlock(&csa->register_lock);
+}
+
+static void spu_backing_master_stop(struct spu_context *ctx)
+{
+ struct spu_state *csa = &ctx->csa;
+ u64 sr1;
+
+ spin_lock(&csa->register_lock);
+ sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ csa->priv1.mfc_sr1_RW = sr1;
+ spin_unlock(&csa->register_lock);
+}
+
+static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
+ u32 mode)
+{
+ struct spu_problem_collapsed *prob = &ctx->csa.prob;
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ ret = -EAGAIN;
+ if (prob->dma_querytype_RW)
+ goto out;
+ ret = 0;
+ /* FIXME: what are the side-effects of this? */
+ prob->dma_querymask_RW = mask;
+ prob->dma_querytype_RW = mode;
+ /* In the current implementation, the SPU context is always
+ * acquired in runnable state when new bits are added to the
+ * mask (tagwait), so it's sufficient just to mask
+ * dma_tagstatus_R with the 'mask' parameter here.
+ */
+ ctx->csa.prob.dma_tagstatus_R &= mask;
+out:
+ spin_unlock(&ctx->csa.register_lock);
+
+ return ret;
+}
+
+static u32 spu_backing_read_mfc_tagstatus(struct spu_context * ctx)
+{
+ return ctx->csa.prob.dma_tagstatus_R;
+}
+
+static u32 spu_backing_get_mfc_free_elements(struct spu_context *ctx)
+{
+ return ctx->csa.prob.dma_qstatus_R;
+}
+
+static int spu_backing_send_mfc_command(struct spu_context *ctx,
+ struct mfc_dma_command *cmd)
+{
+ int ret;
+
+ spin_lock(&ctx->csa.register_lock);
+ ret = -EAGAIN;
+ /* FIXME: set up priv2->puq */
+ spin_unlock(&ctx->csa.register_lock);
+
+ return ret;
+}
+
+static void spu_backing_restart_dma(struct spu_context *ctx)
+{
+ ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+}
+
+struct spu_context_ops spu_backing_ops = {
+ .mbox_read = spu_backing_mbox_read,
+ .mbox_stat_read = spu_backing_mbox_stat_read,
+ .mbox_stat_poll = spu_backing_mbox_stat_poll,
+ .ibox_read = spu_backing_ibox_read,
+ .wbox_write = spu_backing_wbox_write,
+ .signal1_read = spu_backing_signal1_read,
+ .signal1_write = spu_backing_signal1_write,
+ .signal2_read = spu_backing_signal2_read,
+ .signal2_write = spu_backing_signal2_write,
+ .signal1_type_set = spu_backing_signal1_type_set,
+ .signal1_type_get = spu_backing_signal1_type_get,
+ .signal2_type_set = spu_backing_signal2_type_set,
+ .signal2_type_get = spu_backing_signal2_type_get,
+ .npc_read = spu_backing_npc_read,
+ .npc_write = spu_backing_npc_write,
+ .status_read = spu_backing_status_read,
+ .get_ls = spu_backing_get_ls,
+ .privcntl_write = spu_backing_privcntl_write,
+ .runcntl_read = spu_backing_runcntl_read,
+ .runcntl_write = spu_backing_runcntl_write,
+ .runcntl_stop = spu_backing_runcntl_stop,
+ .master_start = spu_backing_master_start,
+ .master_stop = spu_backing_master_stop,
+ .set_mfc_query = spu_backing_set_mfc_query,
+ .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus,
+ .get_mfc_free_elements = spu_backing_get_mfc_free_elements,
+ .send_mfc_command = spu_backing_send_mfc_command,
+ .restart_dma = spu_backing_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
new file mode 100644
index 000000000..7a39cc414
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU file system -- SPU context management
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/atomic.h>
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include "spufs.h"
+#include "sputrace.h"
+
+
+atomic_t nr_spu_contexts = ATOMIC_INIT(0);
+
+struct spu_context *alloc_spu_context(struct spu_gang *gang)
+{
+ struct spu_context *ctx;
+
+ ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
+ if (!ctx)
+ goto out;
+ /* Binding to physical processor deferred
+ * until spu_activate().
+ */
+ if (spu_init_csa(&ctx->csa))
+ goto out_free;
+ spin_lock_init(&ctx->mmio_lock);
+ mutex_init(&ctx->mapping_lock);
+ kref_init(&ctx->kref);
+ mutex_init(&ctx->state_mutex);
+ mutex_init(&ctx->run_mutex);
+ init_waitqueue_head(&ctx->ibox_wq);
+ init_waitqueue_head(&ctx->wbox_wq);
+ init_waitqueue_head(&ctx->stop_wq);
+ init_waitqueue_head(&ctx->mfc_wq);
+ init_waitqueue_head(&ctx->run_wq);
+ ctx->state = SPU_STATE_SAVED;
+ ctx->ops = &spu_backing_ops;
+ ctx->owner = get_task_mm(current);
+ INIT_LIST_HEAD(&ctx->rq);
+ INIT_LIST_HEAD(&ctx->aff_list);
+ if (gang)
+ spu_gang_add_ctx(gang, ctx);
+
+ __spu_update_sched_info(ctx);
+ spu_set_timeslice(ctx);
+ ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
+ ctx->stats.tstamp = ktime_get_ns();
+
+ atomic_inc(&nr_spu_contexts);
+ goto out;
+out_free:
+ kfree(ctx);
+ ctx = NULL;
+out:
+ return ctx;
+}
+
+void destroy_spu_context(struct kref *kref)
+{
+ struct spu_context *ctx;
+ ctx = container_of(kref, struct spu_context, kref);
+ spu_context_nospu_trace(destroy_spu_context__enter, ctx);
+ mutex_lock(&ctx->state_mutex);
+ spu_deactivate(ctx);
+ mutex_unlock(&ctx->state_mutex);
+ spu_fini_csa(&ctx->csa);
+ if (ctx->gang)
+ spu_gang_remove_ctx(ctx->gang, ctx);
+ if (ctx->prof_priv_kref)
+ kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
+ BUG_ON(!list_empty(&ctx->rq));
+ atomic_dec(&nr_spu_contexts);
+ kfree(ctx->switch_log);
+ kfree(ctx);
+}
+
+struct spu_context * get_spu_context(struct spu_context *ctx)
+{
+ kref_get(&ctx->kref);
+ return ctx;
+}
+
+int put_spu_context(struct spu_context *ctx)
+{
+ return kref_put(&ctx->kref, &destroy_spu_context);
+}
+
+/* give up the mm reference when the context is about to be destroyed */
+void spu_forget(struct spu_context *ctx)
+{
+ struct mm_struct *mm;
+
+ /*
+ * This is basically an open-coded spu_acquire_saved, except that
+ * we don't acquire the state mutex interruptible, and we don't
+ * want this context to be rescheduled on release.
+ */
+ mutex_lock(&ctx->state_mutex);
+ if (ctx->state != SPU_STATE_SAVED)
+ spu_deactivate(ctx);
+
+ mm = ctx->owner;
+ ctx->owner = NULL;
+ mmput(mm);
+ spu_release(ctx);
+}
+
+void spu_unmap_mappings(struct spu_context *ctx)
+{
+ mutex_lock(&ctx->mapping_lock);
+ if (ctx->local_store)
+ unmap_mapping_range(ctx->local_store, 0, LS_SIZE, 1);
+ if (ctx->mfc)
+ unmap_mapping_range(ctx->mfc, 0, SPUFS_MFC_MAP_SIZE, 1);
+ if (ctx->cntl)
+ unmap_mapping_range(ctx->cntl, 0, SPUFS_CNTL_MAP_SIZE, 1);
+ if (ctx->signal1)
+ unmap_mapping_range(ctx->signal1, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+ if (ctx->signal2)
+ unmap_mapping_range(ctx->signal2, 0, SPUFS_SIGNAL_MAP_SIZE, 1);
+ if (ctx->mss)
+ unmap_mapping_range(ctx->mss, 0, SPUFS_MSS_MAP_SIZE, 1);
+ if (ctx->psmap)
+ unmap_mapping_range(ctx->psmap, 0, SPUFS_PS_MAP_SIZE, 1);
+ mutex_unlock(&ctx->mapping_lock);
+}
+
+/**
+ * spu_acquire_saved - lock spu contex and make sure it is in saved state
+ * @ctx: spu contex to lock
+ */
+int spu_acquire_saved(struct spu_context *ctx)
+{
+ int ret;
+
+ spu_context_nospu_trace(spu_acquire_saved__enter, ctx);
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ if (ctx->state != SPU_STATE_SAVED) {
+ set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
+ spu_deactivate(ctx);
+ }
+
+ return 0;
+}
+
+/**
+ * spu_release_saved - unlock spu context and return it to the runqueue
+ * @ctx: context to unlock
+ */
+void spu_release_saved(struct spu_context *ctx)
+{
+ BUG_ON(ctx->state != SPU_STATE_SAVED);
+
+ if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags) &&
+ test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+ spu_activate(ctx, 0);
+
+ spu_release(ctx);
+}
+
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
new file mode 100644
index 000000000..1a5876180
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU core dump code
+ *
+ * (C) Copyright 2006 IBM Corp.
+ *
+ * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
+ */
+
+#include <linux/elf.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/list.h>
+#include <linux/syscalls.h>
+#include <linux/coredump.h>
+#include <linux/binfmts.h>
+
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+
+static int spufs_ctx_note_size(struct spu_context *ctx, int dfd)
+{
+ int i, sz, total = 0;
+ char *name;
+ char fullname[80];
+
+ for (i = 0; spufs_coredump_read[i].name != NULL; i++) {
+ name = spufs_coredump_read[i].name;
+ sz = spufs_coredump_read[i].size;
+
+ sprintf(fullname, "SPU/%d/%s", dfd, name);
+
+ total += sizeof(struct elf_note);
+ total += roundup(strlen(fullname) + 1, 4);
+ total += roundup(sz, 4);
+ }
+
+ return total;
+}
+
+static int match_context(const void *v, struct file *file, unsigned fd)
+{
+ struct spu_context *ctx;
+ if (file->f_op != &spufs_context_fops)
+ return 0;
+ ctx = SPUFS_I(file_inode(file))->i_ctx;
+ if (ctx->flags & SPU_CREATE_NOSCHED)
+ return 0;
+ return fd + 1;
+}
+
+/*
+ * The additional architecture-specific notes for Cell are various
+ * context files in the spu context.
+ *
+ * This function iterates over all open file descriptors and sees
+ * if they are a directory in spufs. In that case we use spufs
+ * internal functionality to dump them without needing to actually
+ * open the files.
+ */
+/*
+ * descriptor table is not shared, so files can't change or go away.
+ */
+static struct spu_context *coredump_next_context(int *fd)
+{
+ struct spu_context *ctx;
+ struct file *file;
+ int n = iterate_fd(current->files, *fd, match_context, NULL);
+ if (!n)
+ return NULL;
+ *fd = n - 1;
+
+ rcu_read_lock();
+ file = lookup_fd_rcu(*fd);
+ ctx = SPUFS_I(file_inode(file))->i_ctx;
+ get_spu_context(ctx);
+ rcu_read_unlock();
+
+ return ctx;
+}
+
+int spufs_coredump_extra_notes_size(void)
+{
+ struct spu_context *ctx;
+ int size = 0, rc, fd;
+
+ fd = 0;
+ while ((ctx = coredump_next_context(&fd)) != NULL) {
+ rc = spu_acquire_saved(ctx);
+ if (rc) {
+ put_spu_context(ctx);
+ break;
+ }
+
+ rc = spufs_ctx_note_size(ctx, fd);
+ spu_release_saved(ctx);
+ if (rc < 0) {
+ put_spu_context(ctx);
+ break;
+ }
+
+ size += rc;
+
+ /* start searching the next fd next time */
+ fd++;
+ put_spu_context(ctx);
+ }
+
+ return size;
+}
+
+static int spufs_arch_write_note(struct spu_context *ctx, int i,
+ struct coredump_params *cprm, int dfd)
+{
+ size_t sz = spufs_coredump_read[i].size;
+ char fullname[80];
+ struct elf_note en;
+ int ret;
+
+ sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name);
+ en.n_namesz = strlen(fullname) + 1;
+ en.n_descsz = sz;
+ en.n_type = NT_SPU;
+
+ if (!dump_emit(cprm, &en, sizeof(en)))
+ return -EIO;
+ if (!dump_emit(cprm, fullname, en.n_namesz))
+ return -EIO;
+ if (!dump_align(cprm, 4))
+ return -EIO;
+
+ if (spufs_coredump_read[i].dump) {
+ ret = spufs_coredump_read[i].dump(ctx, cprm);
+ if (ret < 0)
+ return ret;
+ } else {
+ char buf[32];
+
+ ret = snprintf(buf, sizeof(buf), "0x%.16llx",
+ spufs_coredump_read[i].get(ctx));
+ if (ret >= sizeof(buf))
+ return sizeof(buf);
+
+ /* count trailing the NULL: */
+ if (!dump_emit(cprm, buf, ret + 1))
+ return -EIO;
+ }
+
+ dump_skip_to(cprm, roundup(cprm->pos - ret + sz, 4));
+ return 0;
+}
+
+int spufs_coredump_extra_notes_write(struct coredump_params *cprm)
+{
+ struct spu_context *ctx;
+ int fd, j, rc;
+
+ fd = 0;
+ while ((ctx = coredump_next_context(&fd)) != NULL) {
+ rc = spu_acquire_saved(ctx);
+ if (rc)
+ return rc;
+
+ for (j = 0; spufs_coredump_read[j].name != NULL; j++) {
+ rc = spufs_arch_write_note(ctx, j, cprm, fd);
+ if (rc) {
+ spu_release_saved(ctx);
+ return rc;
+ }
+ }
+
+ spu_release_saved(ctx);
+
+ /* start searching the next fd next time */
+ fd++;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
new file mode 100644
index 000000000..24adbe3c6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Low-level SPU handling
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+#include <linux/sched/signal.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+
+#include "spufs.h"
+
+/**
+ * Handle an SPE event, depending on context SPU_CREATE_EVENTS_ENABLED flag.
+ *
+ * If the context was created with events, we just set the return event.
+ * Otherwise, send an appropriate signal to the process.
+ */
+static void spufs_handle_event(struct spu_context *ctx,
+ unsigned long ea, int type)
+{
+ if (ctx->flags & SPU_CREATE_EVENTS_ENABLED) {
+ ctx->event_return |= type;
+ wake_up_all(&ctx->stop_wq);
+ return;
+ }
+
+ switch (type) {
+ case SPE_EVENT_INVALID_DMA:
+ force_sig_fault(SIGBUS, BUS_OBJERR, NULL);
+ break;
+ case SPE_EVENT_SPE_DATA_STORAGE:
+ ctx->ops->restart_dma(ctx);
+ force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *)ea);
+ break;
+ case SPE_EVENT_DMA_ALIGNMENT:
+ /* DAR isn't set for an alignment fault :( */
+ force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
+ break;
+ case SPE_EVENT_SPE_ERROR:
+ force_sig_fault(
+ SIGILL, ILL_ILLOPC,
+ (void __user *)(unsigned long)
+ ctx->ops->npc_read(ctx) - 4);
+ break;
+ }
+}
+
+int spufs_handle_class0(struct spu_context *ctx)
+{
+ unsigned long stat = ctx->csa.class_0_pending & CLASS0_INTR_MASK;
+
+ if (likely(!stat))
+ return 0;
+
+ if (stat & CLASS0_DMA_ALIGNMENT_INTR)
+ spufs_handle_event(ctx, ctx->csa.class_0_dar,
+ SPE_EVENT_DMA_ALIGNMENT);
+
+ if (stat & CLASS0_INVALID_DMA_COMMAND_INTR)
+ spufs_handle_event(ctx, ctx->csa.class_0_dar,
+ SPE_EVENT_INVALID_DMA);
+
+ if (stat & CLASS0_SPU_ERROR_INTR)
+ spufs_handle_event(ctx, ctx->csa.class_0_dar,
+ SPE_EVENT_SPE_ERROR);
+
+ ctx->csa.class_0_pending = 0;
+
+ return -EIO;
+}
+
+/*
+ * bottom half handler for page faults, we can't do this from
+ * interrupt context, since we might need to sleep.
+ * we also need to give up the mutex so we can get scheduled
+ * out while waiting for the backing store.
+ *
+ * TODO: try calling hash_page from the interrupt handler first
+ * in order to speed up the easy case.
+ */
+int spufs_handle_class1(struct spu_context *ctx)
+{
+ u64 ea, dsisr, access;
+ unsigned long flags;
+ vm_fault_t flt = 0;
+ int ret;
+
+ /*
+ * dar and dsisr get passed from the registers
+ * to the spu_context, to this function, but not
+ * back to the spu if it gets scheduled again.
+ *
+ * if we don't handle the fault for a saved context
+ * in time, we can still expect to get the same fault
+ * the immediately after the context restore.
+ */
+ ea = ctx->csa.class_1_dar;
+ dsisr = ctx->csa.class_1_dsisr;
+
+ if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
+ return 0;
+
+ spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
+
+ pr_debug("ctx %p: ea %016llx, dsisr %016llx state %d\n", ctx, ea,
+ dsisr, ctx->state);
+
+ ctx->stats.hash_flt++;
+ if (ctx->state == SPU_STATE_RUNNABLE)
+ ctx->spu->stats.hash_flt++;
+
+ /* we must not hold the lock when entering copro_handle_mm_fault */
+ spu_release(ctx);
+
+ access = (_PAGE_PRESENT | _PAGE_READ);
+ access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
+ local_irq_save(flags);
+ ret = hash_page(ea, access, 0x300, dsisr);
+ local_irq_restore(flags);
+
+ /* hashing failed, so try the actual fault handler */
+ if (ret)
+ ret = copro_handle_mm_fault(current->mm, ea, dsisr, &flt);
+
+ /*
+ * This is nasty: we need the state_mutex for all the bookkeeping even
+ * if the syscall was interrupted by a signal. ewww.
+ */
+ mutex_lock(&ctx->state_mutex);
+
+ /*
+ * Clear dsisr under ctxt lock after handling the fault, so that
+ * time slicing will not preempt the context while the page fault
+ * handler is running. Context switch code removes mappings.
+ */
+ ctx->csa.class_1_dar = ctx->csa.class_1_dsisr = 0;
+
+ /*
+ * If we handled the fault successfully and are in runnable
+ * state, restart the DMA.
+ * In case of unhandled error report the problem to user space.
+ */
+ if (!ret) {
+ if (flt & VM_FAULT_MAJOR)
+ ctx->stats.maj_flt++;
+ else
+ ctx->stats.min_flt++;
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ if (flt & VM_FAULT_MAJOR)
+ ctx->spu->stats.maj_flt++;
+ else
+ ctx->spu->stats.min_flt++;
+ }
+
+ if (ctx->spu)
+ ctx->ops->restart_dma(ctx);
+ } else
+ spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+ return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
new file mode 100644
index 000000000..02a8158c4
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -0,0 +1,2633 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU file system -- file contents
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/coredump.h>
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/export.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/ptrace.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <asm/io.h>
+#include <asm/time.h>
+#include <asm/spu.h>
+#include <asm/spu_info.h>
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+#include "sputrace.h"
+
+#define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
+
+/* Simple attribute files */
+struct spufs_attr {
+ int (*get)(void *, u64 *);
+ int (*set)(void *, u64);
+ char get_buf[24]; /* enough to store a u64 and "\n\0" */
+ char set_buf[24];
+ void *data;
+ const char *fmt; /* format for read operation */
+ struct mutex mutex; /* protects access to these buffers */
+};
+
+static int spufs_attr_open(struct inode *inode, struct file *file,
+ int (*get)(void *, u64 *), int (*set)(void *, u64),
+ const char *fmt)
+{
+ struct spufs_attr *attr;
+
+ attr = kmalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ attr->get = get;
+ attr->set = set;
+ attr->data = inode->i_private;
+ attr->fmt = fmt;
+ mutex_init(&attr->mutex);
+ file->private_data = attr;
+
+ return nonseekable_open(inode, file);
+}
+
+static int spufs_attr_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
+static ssize_t spufs_attr_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct spufs_attr *attr;
+ size_t size;
+ ssize_t ret;
+
+ attr = file->private_data;
+ if (!attr->get)
+ return -EACCES;
+
+ ret = mutex_lock_interruptible(&attr->mutex);
+ if (ret)
+ return ret;
+
+ if (*ppos) { /* continued read */
+ size = strlen(attr->get_buf);
+ } else { /* first read */
+ u64 val;
+ ret = attr->get(attr->data, &val);
+ if (ret)
+ goto out;
+
+ size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
+ attr->fmt, (unsigned long long)val);
+ }
+
+ ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
+out:
+ mutex_unlock(&attr->mutex);
+ return ret;
+}
+
+static ssize_t spufs_attr_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct spufs_attr *attr;
+ u64 val;
+ size_t size;
+ ssize_t ret;
+
+ attr = file->private_data;
+ if (!attr->set)
+ return -EACCES;
+
+ ret = mutex_lock_interruptible(&attr->mutex);
+ if (ret)
+ return ret;
+
+ ret = -EFAULT;
+ size = min(sizeof(attr->set_buf) - 1, len);
+ if (copy_from_user(attr->set_buf, buf, size))
+ goto out;
+
+ ret = len; /* claim we got the whole input */
+ attr->set_buf[size] = '\0';
+ val = simple_strtol(attr->set_buf, NULL, 0);
+ attr->set(attr->data, val);
+out:
+ mutex_unlock(&attr->mutex);
+ return ret;
+}
+
+static ssize_t spufs_dump_emit(struct coredump_params *cprm, void *buf,
+ size_t size)
+{
+ if (!dump_emit(cprm, buf, size))
+ return -EIO;
+ return size;
+}
+
+#define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \
+static int __fops ## _open(struct inode *inode, struct file *file) \
+{ \
+ __simple_attr_check_format(__fmt, 0ull); \
+ return spufs_attr_open(inode, file, __get, __set, __fmt); \
+} \
+static const struct file_operations __fops = { \
+ .open = __fops ## _open, \
+ .release = spufs_attr_release, \
+ .read = spufs_attr_read, \
+ .write = spufs_attr_write, \
+ .llseek = generic_file_llseek, \
+};
+
+
+static int
+spufs_mem_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->local_store = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static int
+spufs_mem_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->local_store = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static ssize_t
+spufs_mem_dump(struct spu_context *ctx, struct coredump_params *cprm)
+{
+ return spufs_dump_emit(cprm, ctx->ops->get_ls(ctx), LS_SIZE);
+}
+
+static ssize_t
+spufs_mem_read(struct file *file, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ret = simple_read_from_buffer(buffer, size, pos, ctx->ops->get_ls(ctx),
+ LS_SIZE);
+ spu_release(ctx);
+
+ return ret;
+}
+
+static ssize_t
+spufs_mem_write(struct file *file, const char __user *buffer,
+ size_t size, loff_t *ppos)
+{
+ struct spu_context *ctx = file->private_data;
+ char *local_store;
+ loff_t pos = *ppos;
+ int ret;
+
+ if (pos > LS_SIZE)
+ return -EFBIG;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ local_store = ctx->ops->get_ls(ctx);
+ size = simple_write_to_buffer(local_store, LS_SIZE, ppos, buffer, size);
+ spu_release(ctx);
+
+ return size;
+}
+
+static vm_fault_t
+spufs_mem_mmap_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct spu_context *ctx = vma->vm_file->private_data;
+ unsigned long pfn, offset;
+ vm_fault_t ret;
+
+ offset = vmf->pgoff << PAGE_SHIFT;
+ if (offset >= LS_SIZE)
+ return VM_FAULT_SIGBUS;
+
+ pr_debug("spufs_mem_mmap_fault address=0x%lx, offset=0x%lx\n",
+ vmf->address, offset);
+
+ if (spu_acquire(ctx))
+ return VM_FAULT_NOPAGE;
+
+ if (ctx->state == SPU_STATE_SAVED) {
+ vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+ pfn = vmalloc_to_pfn(ctx->csa.lscsa->ls + offset);
+ } else {
+ vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+ pfn = (ctx->spu->local_store_phys + offset) >> PAGE_SHIFT;
+ }
+ ret = vmf_insert_pfn(vma, vmf->address, pfn);
+
+ spu_release(ctx);
+
+ return ret;
+}
+
+static int spufs_mem_mmap_access(struct vm_area_struct *vma,
+ unsigned long address,
+ void *buf, int len, int write)
+{
+ struct spu_context *ctx = vma->vm_file->private_data;
+ unsigned long offset = address - vma->vm_start;
+ char *local_store;
+
+ if (write && !(vma->vm_flags & VM_WRITE))
+ return -EACCES;
+ if (spu_acquire(ctx))
+ return -EINTR;
+ if ((offset + len) > vma->vm_end)
+ len = vma->vm_end - offset;
+ local_store = ctx->ops->get_ls(ctx);
+ if (write)
+ memcpy_toio(local_store + offset, buf, len);
+ else
+ memcpy_fromio(buf, local_store + offset, len);
+ spu_release(ctx);
+ return len;
+}
+
+static const struct vm_operations_struct spufs_mem_mmap_vmops = {
+ .fault = spufs_mem_mmap_fault,
+ .access = spufs_mem_mmap_access,
+};
+
+static int spufs_mem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_mem_mmap_vmops;
+ return 0;
+}
+
+static const struct file_operations spufs_mem_fops = {
+ .open = spufs_mem_open,
+ .release = spufs_mem_release,
+ .read = spufs_mem_read,
+ .write = spufs_mem_write,
+ .llseek = generic_file_llseek,
+ .mmap = spufs_mem_mmap,
+};
+
+static vm_fault_t spufs_ps_fault(struct vm_fault *vmf,
+ unsigned long ps_offs,
+ unsigned long ps_size)
+{
+ struct spu_context *ctx = vmf->vma->vm_file->private_data;
+ unsigned long area, offset = vmf->pgoff << PAGE_SHIFT;
+ int err = 0;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
+
+ spu_context_nospu_trace(spufs_ps_fault__enter, ctx);
+
+ if (offset >= ps_size)
+ return VM_FAULT_SIGBUS;
+
+ if (fatal_signal_pending(current))
+ return VM_FAULT_SIGBUS;
+
+ /*
+ * Because we release the mmap_lock, the context may be destroyed while
+ * we're in spu_wait. Grab an extra reference so it isn't destroyed
+ * in the meantime.
+ */
+ get_spu_context(ctx);
+
+ /*
+ * We have to wait for context to be loaded before we have
+ * pages to hand out to the user, but we don't want to wait
+ * with the mmap_lock held.
+ * It is possible to drop the mmap_lock here, but then we need
+ * to return VM_FAULT_NOPAGE because the mappings may have
+ * hanged.
+ */
+ if (spu_acquire(ctx))
+ goto refault;
+
+ if (ctx->state == SPU_STATE_SAVED) {
+ mmap_read_unlock(current->mm);
+ spu_context_nospu_trace(spufs_ps_fault__sleep, ctx);
+ err = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+ spu_context_trace(spufs_ps_fault__wake, ctx, ctx->spu);
+ mmap_read_lock(current->mm);
+ } else {
+ area = ctx->spu->problem_phys + ps_offs;
+ ret = vmf_insert_pfn(vmf->vma, vmf->address,
+ (area + offset) >> PAGE_SHIFT);
+ spu_context_trace(spufs_ps_fault__insert, ctx, ctx->spu);
+ }
+
+ if (!err)
+ spu_release(ctx);
+
+refault:
+ put_spu_context(ctx);
+ return ret;
+}
+
+#if SPUFS_MMAP_4K
+static vm_fault_t spufs_cntl_mmap_fault(struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
+ .fault = spufs_cntl_mmap_fault,
+};
+
+/*
+ * mmap support for problem state control area [0x4000 - 0x4fff].
+ */
+static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_cntl_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_cntl_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_cntl_get(void *data, u64 *val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ *val = ctx->ops->status_read(ctx);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static int spufs_cntl_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->runcntl_write(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static int spufs_cntl_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->cntl = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return simple_attr_open(inode, file, spufs_cntl_get,
+ spufs_cntl_set, "0x%08lx");
+}
+
+static int
+spufs_cntl_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ simple_attr_release(inode, file);
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->cntl = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static const struct file_operations spufs_cntl_fops = {
+ .open = spufs_cntl_open,
+ .release = spufs_cntl_release,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = no_llseek,
+ .mmap = spufs_cntl_mmap,
+};
+
+static int
+spufs_regs_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ file->private_data = i->i_ctx;
+ return 0;
+}
+
+static ssize_t
+spufs_regs_dump(struct spu_context *ctx, struct coredump_params *cprm)
+{
+ return spufs_dump_emit(cprm, ctx->csa.lscsa->gprs,
+ sizeof(ctx->csa.lscsa->gprs));
+}
+
+static ssize_t
+spufs_regs_read(struct file *file, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ /* pre-check for file position: if we'd return EOF, there's no point
+ * causing a deschedule */
+ if (*pos >= sizeof(ctx->csa.lscsa->gprs))
+ return 0;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = simple_read_from_buffer(buffer, size, pos, ctx->csa.lscsa->gprs,
+ sizeof(ctx->csa.lscsa->gprs));
+ spu_release_saved(ctx);
+ return ret;
+}
+
+static ssize_t
+spufs_regs_write(struct file *file, const char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ if (*pos >= sizeof(lscsa->gprs))
+ return -EFBIG;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+
+ size = simple_write_to_buffer(lscsa->gprs, sizeof(lscsa->gprs), pos,
+ buffer, size);
+
+ spu_release_saved(ctx);
+ return size;
+}
+
+static const struct file_operations spufs_regs_fops = {
+ .open = spufs_regs_open,
+ .read = spufs_regs_read,
+ .write = spufs_regs_write,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t
+spufs_fpcr_dump(struct spu_context *ctx, struct coredump_params *cprm)
+{
+ return spufs_dump_emit(cprm, &ctx->csa.lscsa->fpcr,
+ sizeof(ctx->csa.lscsa->fpcr));
+}
+
+static ssize_t
+spufs_fpcr_read(struct file *file, char __user * buffer,
+ size_t size, loff_t * pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = simple_read_from_buffer(buffer, size, pos, &ctx->csa.lscsa->fpcr,
+ sizeof(ctx->csa.lscsa->fpcr));
+ spu_release_saved(ctx);
+ return ret;
+}
+
+static ssize_t
+spufs_fpcr_write(struct file *file, const char __user * buffer,
+ size_t size, loff_t * pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ if (*pos >= sizeof(lscsa->fpcr))
+ return -EFBIG;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+
+ size = simple_write_to_buffer(&lscsa->fpcr, sizeof(lscsa->fpcr), pos,
+ buffer, size);
+
+ spu_release_saved(ctx);
+ return size;
+}
+
+static const struct file_operations spufs_fpcr_fops = {
+ .open = spufs_regs_open,
+ .read = spufs_fpcr_read,
+ .write = spufs_fpcr_write,
+ .llseek = generic_file_llseek,
+};
+
+/* generic open function for all pipe-like files */
+static int spufs_pipe_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ file->private_data = i->i_ctx;
+
+ return stream_open(inode, file);
+}
+
+/*
+ * Read as many bytes from the mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ */
+static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 mbox_data, __user *udata = (void __user *)buf;
+ ssize_t count;
+
+ if (len < 4)
+ return -EINVAL;
+
+ count = spu_acquire(ctx);
+ if (count)
+ return count;
+
+ for (count = 0; (count + 4) <= len; count += 4, udata++) {
+ int ret;
+ ret = ctx->ops->mbox_read(ctx, &mbox_data);
+ if (ret == 0)
+ break;
+
+ /*
+ * at the end of the mapped area, we can fault
+ * but still need to return the data we have
+ * read successfully so far.
+ */
+ ret = put_user(mbox_data, udata);
+ if (ret) {
+ if (!count)
+ count = -EFAULT;
+ break;
+ }
+ }
+ spu_release(ctx);
+
+ if (!count)
+ count = -EAGAIN;
+
+ return count;
+}
+
+static const struct file_operations spufs_mbox_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_mbox_read,
+ .llseek = no_llseek,
+};
+
+static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+ u32 mbox_stat;
+
+ if (len < 4)
+ return -EINVAL;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ mbox_stat = ctx->ops->mbox_stat_read(ctx) & 0xff;
+
+ spu_release(ctx);
+
+ if (copy_to_user(buf, &mbox_stat, sizeof mbox_stat))
+ return -EFAULT;
+
+ return 4;
+}
+
+static const struct file_operations spufs_mbox_stat_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_mbox_stat_read,
+ .llseek = no_llseek,
+};
+
+/* low-level ibox access function */
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data)
+{
+ return ctx->ops->ibox_read(ctx, data);
+}
+
+/* interrupt-level ibox callback function. */
+void spufs_ibox_callback(struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ if (ctx)
+ wake_up_all(&ctx->ibox_wq);
+}
+
+/*
+ * Read as many bytes from the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - no more data available in the mailbox
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * any data is available, but return when we have been able to
+ * read something.
+ */
+static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 ibox_data, __user *udata = (void __user *)buf;
+ ssize_t count;
+
+ if (len < 4)
+ return -EINVAL;
+
+ count = spu_acquire(ctx);
+ if (count)
+ goto out;
+
+ /* wait only for the first element */
+ count = 0;
+ if (file->f_flags & O_NONBLOCK) {
+ if (!spu_ibox_read(ctx, &ibox_data)) {
+ count = -EAGAIN;
+ goto out_unlock;
+ }
+ } else {
+ count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+ if (count)
+ goto out;
+ }
+
+ /* if we can't write at all, return -EFAULT */
+ count = put_user(ibox_data, udata);
+ if (count)
+ goto out_unlock;
+
+ for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+ int ret;
+ ret = ctx->ops->ibox_read(ctx, &ibox_data);
+ if (ret == 0)
+ break;
+ /*
+ * at the end of the mapped area, we can fault
+ * but still need to return the data we have
+ * read successfully so far.
+ */
+ ret = put_user(ibox_data, udata);
+ if (ret)
+ break;
+ }
+
+out_unlock:
+ spu_release(ctx);
+out:
+ return count;
+}
+
+static __poll_t spufs_ibox_poll(struct file *file, poll_table *wait)
+{
+ struct spu_context *ctx = file->private_data;
+ __poll_t mask;
+
+ poll_wait(file, &ctx->ibox_wq, wait);
+
+ /*
+ * For now keep this uninterruptible and also ignore the rule
+ * that poll should not sleep. Will be fixed later.
+ */
+ mutex_lock(&ctx->state_mutex);
+ mask = ctx->ops->mbox_stat_poll(ctx, EPOLLIN | EPOLLRDNORM);
+ spu_release(ctx);
+
+ return mask;
+}
+
+static const struct file_operations spufs_ibox_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_ibox_read,
+ .poll = spufs_ibox_poll,
+ .llseek = no_llseek,
+};
+
+static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+ u32 ibox_stat;
+
+ if (len < 4)
+ return -EINVAL;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ibox_stat = (ctx->ops->mbox_stat_read(ctx) >> 16) & 0xff;
+ spu_release(ctx);
+
+ if (copy_to_user(buf, &ibox_stat, sizeof ibox_stat))
+ return -EFAULT;
+
+ return 4;
+}
+
+static const struct file_operations spufs_ibox_stat_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_ibox_stat_read,
+ .llseek = no_llseek,
+};
+
+/* low-level mailbox write */
+size_t spu_wbox_write(struct spu_context *ctx, u32 data)
+{
+ return ctx->ops->wbox_write(ctx, data);
+}
+
+/* interrupt-level wbox callback function. */
+void spufs_wbox_callback(struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ if (ctx)
+ wake_up_all(&ctx->wbox_wq);
+}
+
+/*
+ * Write as many bytes to the interrupt mailbox as possible, until
+ * one of the conditions becomes true:
+ *
+ * - the mailbox is full
+ * - end of the user provided buffer
+ * - end of the mapped area
+ *
+ * If the file is opened without O_NONBLOCK, we wait here until
+ * space is available, but return when we have been able to
+ * write something.
+ */
+static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 wbox_data, __user *udata = (void __user *)buf;
+ ssize_t count;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (get_user(wbox_data, udata))
+ return -EFAULT;
+
+ count = spu_acquire(ctx);
+ if (count)
+ goto out;
+
+ /*
+ * make sure we can at least write one element, by waiting
+ * in case of !O_NONBLOCK
+ */
+ count = 0;
+ if (file->f_flags & O_NONBLOCK) {
+ if (!spu_wbox_write(ctx, wbox_data)) {
+ count = -EAGAIN;
+ goto out_unlock;
+ }
+ } else {
+ count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+ if (count)
+ goto out;
+ }
+
+
+ /* write as much as possible */
+ for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
+ int ret;
+ ret = get_user(wbox_data, udata);
+ if (ret)
+ break;
+
+ ret = spu_wbox_write(ctx, wbox_data);
+ if (ret == 0)
+ break;
+ }
+
+out_unlock:
+ spu_release(ctx);
+out:
+ return count;
+}
+
+static __poll_t spufs_wbox_poll(struct file *file, poll_table *wait)
+{
+ struct spu_context *ctx = file->private_data;
+ __poll_t mask;
+
+ poll_wait(file, &ctx->wbox_wq, wait);
+
+ /*
+ * For now keep this uninterruptible and also ignore the rule
+ * that poll should not sleep. Will be fixed later.
+ */
+ mutex_lock(&ctx->state_mutex);
+ mask = ctx->ops->mbox_stat_poll(ctx, EPOLLOUT | EPOLLWRNORM);
+ spu_release(ctx);
+
+ return mask;
+}
+
+static const struct file_operations spufs_wbox_fops = {
+ .open = spufs_pipe_open,
+ .write = spufs_wbox_write,
+ .poll = spufs_wbox_poll,
+ .llseek = no_llseek,
+};
+
+static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ ssize_t ret;
+ u32 wbox_stat;
+
+ if (len < 4)
+ return -EINVAL;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ wbox_stat = (ctx->ops->mbox_stat_read(ctx) >> 8) & 0xff;
+ spu_release(ctx);
+
+ if (copy_to_user(buf, &wbox_stat, sizeof wbox_stat))
+ return -EFAULT;
+
+ return 4;
+}
+
+static const struct file_operations spufs_wbox_stat_fops = {
+ .open = spufs_pipe_open,
+ .read = spufs_wbox_stat_read,
+ .llseek = no_llseek,
+};
+
+static int spufs_signal1_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->signal1 = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal1_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->signal1 = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static ssize_t spufs_signal1_dump(struct spu_context *ctx,
+ struct coredump_params *cprm)
+{
+ if (!ctx->csa.spu_chnlcnt_RW[3])
+ return 0;
+ return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[3],
+ sizeof(ctx->csa.spu_chnldata_RW[3]));
+}
+
+static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf,
+ size_t len)
+{
+ if (len < sizeof(ctx->csa.spu_chnldata_RW[3]))
+ return -EINVAL;
+ if (!ctx->csa.spu_chnlcnt_RW[3])
+ return 0;
+ if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[3],
+ sizeof(ctx->csa.spu_chnldata_RW[3])))
+ return -EFAULT;
+ return sizeof(ctx->csa.spu_chnldata_RW[3]);
+}
+
+static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ int ret;
+ struct spu_context *ctx = file->private_data;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_signal1_read(ctx, buf, len);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static ssize_t spufs_signal1_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx;
+ ssize_t ret;
+ u32 data;
+
+ ctx = file->private_data;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (copy_from_user(&data, buf, 4))
+ return -EFAULT;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal1_write(ctx, data);
+ spu_release(ctx);
+
+ return 4;
+}
+
+static vm_fault_t
+spufs_signal1_mmap_fault(struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+ return spufs_ps_fault(vmf, 0x14000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+ /* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+ * signal 1 and 2 area
+ */
+ return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal1_mmap_vmops = {
+ .fault = spufs_signal1_mmap_fault,
+};
+
+static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_signal1_mmap_vmops;
+ return 0;
+}
+
+static const struct file_operations spufs_signal1_fops = {
+ .open = spufs_signal1_open,
+ .release = spufs_signal1_release,
+ .read = spufs_signal1_read,
+ .write = spufs_signal1_write,
+ .mmap = spufs_signal1_mmap,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal1_nosched_fops = {
+ .open = spufs_signal1_open,
+ .release = spufs_signal1_release,
+ .write = spufs_signal1_write,
+ .mmap = spufs_signal1_mmap,
+ .llseek = no_llseek,
+};
+
+static int spufs_signal2_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->signal2 = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_signal2_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->signal2 = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static ssize_t spufs_signal2_dump(struct spu_context *ctx,
+ struct coredump_params *cprm)
+{
+ if (!ctx->csa.spu_chnlcnt_RW[4])
+ return 0;
+ return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[4],
+ sizeof(ctx->csa.spu_chnldata_RW[4]));
+}
+
+static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf,
+ size_t len)
+{
+ if (len < sizeof(ctx->csa.spu_chnldata_RW[4]))
+ return -EINVAL;
+ if (!ctx->csa.spu_chnlcnt_RW[4])
+ return 0;
+ if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[4],
+ sizeof(ctx->csa.spu_chnldata_RW[4])))
+ return -EFAULT;
+ return sizeof(ctx->csa.spu_chnldata_RW[4]);
+}
+
+static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ ret = __spufs_signal2_read(ctx, buf, len);
+ spu_release_saved(ctx);
+
+ return ret;
+}
+
+static ssize_t spufs_signal2_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx;
+ ssize_t ret;
+ u32 data;
+
+ ctx = file->private_data;
+
+ if (len < 4)
+ return -EINVAL;
+
+ if (copy_from_user(&data, buf, 4))
+ return -EFAULT;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal2_write(ctx, data);
+ spu_release(ctx);
+
+ return 4;
+}
+
+#if SPUFS_MMAP_4K
+static vm_fault_t
+spufs_signal2_mmap_fault(struct vm_fault *vmf)
+{
+#if SPUFS_SIGNAL_MAP_SIZE == 0x1000
+ return spufs_ps_fault(vmf, 0x1c000, SPUFS_SIGNAL_MAP_SIZE);
+#elif SPUFS_SIGNAL_MAP_SIZE == 0x10000
+ /* For 64k pages, both signal1 and signal2 can be used to mmap the whole
+ * signal 1 and 2 area
+ */
+ return spufs_ps_fault(vmf, 0x10000, SPUFS_SIGNAL_MAP_SIZE);
+#else
+#error unsupported page size
+#endif
+}
+
+static const struct vm_operations_struct spufs_signal2_mmap_vmops = {
+ .fault = spufs_signal2_mmap_fault,
+};
+
+static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_signal2_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_signal2_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static const struct file_operations spufs_signal2_fops = {
+ .open = spufs_signal2_open,
+ .release = spufs_signal2_release,
+ .read = spufs_signal2_read,
+ .write = spufs_signal2_write,
+ .mmap = spufs_signal2_mmap,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations spufs_signal2_nosched_fops = {
+ .open = spufs_signal2_open,
+ .release = spufs_signal2_release,
+ .write = spufs_signal2_write,
+ .mmap = spufs_signal2_mmap,
+ .llseek = no_llseek,
+};
+
+/*
+ * This is a wrapper around DEFINE_SIMPLE_ATTRIBUTE which does the
+ * work of acquiring (or not) the SPU context before calling through
+ * to the actual get routine. The set routine is called directly.
+ */
+#define SPU_ATTR_NOACQUIRE 0
+#define SPU_ATTR_ACQUIRE 1
+#define SPU_ATTR_ACQUIRE_SAVED 2
+
+#define DEFINE_SPUFS_ATTRIBUTE(__name, __get, __set, __fmt, __acquire) \
+static int __##__get(void *data, u64 *val) \
+{ \
+ struct spu_context *ctx = data; \
+ int ret = 0; \
+ \
+ if (__acquire == SPU_ATTR_ACQUIRE) { \
+ ret = spu_acquire(ctx); \
+ if (ret) \
+ return ret; \
+ *val = __get(ctx); \
+ spu_release(ctx); \
+ } else if (__acquire == SPU_ATTR_ACQUIRE_SAVED) { \
+ ret = spu_acquire_saved(ctx); \
+ if (ret) \
+ return ret; \
+ *val = __get(ctx); \
+ spu_release_saved(ctx); \
+ } else \
+ *val = __get(ctx); \
+ \
+ return 0; \
+} \
+DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__name, __##__get, __set, __fmt);
+
+static int spufs_signal1_type_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal1_type_set(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static u64 spufs_signal1_type_get(struct spu_context *ctx)
+{
+ return ctx->ops->signal1_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal1_type, spufs_signal1_type_get,
+ spufs_signal1_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+
+static int spufs_signal2_type_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->signal2_type_set(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static u64 spufs_signal2_type_get(struct spu_context *ctx)
+{
+ return ctx->ops->signal2_type_get(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_signal2_type, spufs_signal2_type_get,
+ spufs_signal2_type_set, "%llu\n", SPU_ATTR_ACQUIRE);
+
+#if SPUFS_MMAP_4K
+static vm_fault_t
+spufs_mss_mmap_fault(struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mss_mmap_vmops = {
+ .fault = spufs_mss_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_mss_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mss_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mss_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ file->private_data = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!i->i_openers++)
+ ctx->mss = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mss_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->mss = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static const struct file_operations spufs_mss_fops = {
+ .open = spufs_mss_open,
+ .release = spufs_mss_release,
+ .mmap = spufs_mss_mmap,
+ .llseek = no_llseek,
+};
+
+static vm_fault_t
+spufs_psmap_mmap_fault(struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vmf, 0x0000, SPUFS_PS_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
+ .fault = spufs_psmap_mmap_fault,
+};
+
+/*
+ * mmap support for full problem state area [0x00000 - 0x1ffff].
+ */
+static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_psmap_mmap_vmops;
+ return 0;
+}
+
+static int spufs_psmap_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = i->i_ctx;
+ if (!i->i_openers++)
+ ctx->psmap = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_psmap_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->psmap = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+static const struct file_operations spufs_psmap_fops = {
+ .open = spufs_psmap_open,
+ .release = spufs_psmap_release,
+ .mmap = spufs_psmap_mmap,
+ .llseek = no_llseek,
+};
+
+
+#if SPUFS_MMAP_4K
+static vm_fault_t
+spufs_mfc_mmap_fault(struct vm_fault *vmf)
+{
+ return spufs_ps_fault(vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
+}
+
+static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
+ .fault = spufs_mfc_mmap_fault,
+};
+
+/*
+ * mmap support for problem state MFC DMA area [0x0000 - 0x0fff].
+ */
+static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if (!(vma->vm_flags & VM_SHARED))
+ return -EINVAL;
+
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ vma->vm_ops = &spufs_mfc_mmap_vmops;
+ return 0;
+}
+#else /* SPUFS_MMAP_4K */
+#define spufs_mfc_mmap NULL
+#endif /* !SPUFS_MMAP_4K */
+
+static int spufs_mfc_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ /* we don't want to deal with DMA into other processes */
+ if (ctx->owner != current->mm)
+ return -EINVAL;
+
+ if (atomic_read(&inode->i_count) != 1)
+ return -EBUSY;
+
+ mutex_lock(&ctx->mapping_lock);
+ file->private_data = ctx;
+ if (!i->i_openers++)
+ ctx->mfc = inode->i_mapping;
+ mutex_unlock(&ctx->mapping_lock);
+ return nonseekable_open(inode, file);
+}
+
+static int
+spufs_mfc_release(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+
+ mutex_lock(&ctx->mapping_lock);
+ if (!--i->i_openers)
+ ctx->mfc = NULL;
+ mutex_unlock(&ctx->mapping_lock);
+ return 0;
+}
+
+/* interrupt-level mfc callback function. */
+void spufs_mfc_callback(struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ if (ctx)
+ wake_up_all(&ctx->mfc_wq);
+}
+
+static int spufs_read_mfc_tagstatus(struct spu_context *ctx, u32 *status)
+{
+ /* See if there is one tag group is complete */
+ /* FIXME we need locking around tagwait */
+ *status = ctx->ops->read_mfc_tagstatus(ctx) & ctx->tagwait;
+ ctx->tagwait &= ~*status;
+ if (*status)
+ return 1;
+
+ /* enable interrupt waiting for any tag group,
+ may silently fail if interrupts are already enabled */
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+ return 0;
+}
+
+static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret = -EINVAL;
+ u32 status;
+
+ if (size != 4)
+ goto out;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ ret = -EINVAL;
+ if (file->f_flags & O_NONBLOCK) {
+ status = ctx->ops->read_mfc_tagstatus(ctx);
+ if (!(status & ctx->tagwait))
+ ret = -EAGAIN;
+ else
+ /* XXX(hch): shouldn't we clear ret here? */
+ ctx->tagwait &= ~status;
+ } else {
+ ret = spufs_wait(ctx->mfc_wq,
+ spufs_read_mfc_tagstatus(ctx, &status));
+ if (ret)
+ goto out;
+ }
+ spu_release(ctx);
+
+ ret = 4;
+ if (copy_to_user(buffer, &status, 4))
+ ret = -EFAULT;
+
+out:
+ return ret;
+}
+
+static int spufs_check_valid_dma(struct mfc_dma_command *cmd)
+{
+ pr_debug("queueing DMA %x %llx %x %x %x\n", cmd->lsa,
+ cmd->ea, cmd->size, cmd->tag, cmd->cmd);
+
+ switch (cmd->cmd) {
+ case MFC_PUT_CMD:
+ case MFC_PUTF_CMD:
+ case MFC_PUTB_CMD:
+ case MFC_GET_CMD:
+ case MFC_GETF_CMD:
+ case MFC_GETB_CMD:
+ break;
+ default:
+ pr_debug("invalid DMA opcode %x\n", cmd->cmd);
+ return -EIO;
+ }
+
+ if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) {
+ pr_debug("invalid DMA alignment, ea %llx lsa %x\n",
+ cmd->ea, cmd->lsa);
+ return -EIO;
+ }
+
+ switch (cmd->size & 0xf) {
+ case 1:
+ break;
+ case 2:
+ if (cmd->lsa & 1)
+ goto error;
+ break;
+ case 4:
+ if (cmd->lsa & 3)
+ goto error;
+ break;
+ case 8:
+ if (cmd->lsa & 7)
+ goto error;
+ break;
+ case 0:
+ if (cmd->lsa & 15)
+ goto error;
+ break;
+ error:
+ default:
+ pr_debug("invalid DMA alignment %x for size %x\n",
+ cmd->lsa & 0xf, cmd->size);
+ return -EIO;
+ }
+
+ if (cmd->size > 16 * 1024) {
+ pr_debug("invalid DMA size %x\n", cmd->size);
+ return -EIO;
+ }
+
+ if (cmd->tag & 0xfff0) {
+ /* we reserve the higher tag numbers for kernel use */
+ pr_debug("invalid DMA tag\n");
+ return -EIO;
+ }
+
+ if (cmd->class) {
+ /* not supported in this version */
+ pr_debug("invalid DMA class\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int spu_send_mfc_command(struct spu_context *ctx,
+ struct mfc_dma_command cmd,
+ int *error)
+{
+ *error = ctx->ops->send_mfc_command(ctx, &cmd);
+ if (*error == -EAGAIN) {
+ /* wait for any tag group to complete
+ so we have space for the new command */
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 1);
+ /* try again, because the queue might be
+ empty again */
+ *error = ctx->ops->send_mfc_command(ctx, &cmd);
+ if (*error == -EAGAIN)
+ return 0;
+ }
+ return 1;
+}
+
+static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
+ size_t size, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct mfc_dma_command cmd;
+ int ret = -EINVAL;
+
+ if (size != sizeof cmd)
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(&cmd, buffer, sizeof cmd))
+ goto out;
+
+ ret = spufs_check_valid_dma(&cmd);
+ if (ret)
+ goto out;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ goto out;
+
+ ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+ if (ret)
+ goto out;
+
+ if (file->f_flags & O_NONBLOCK) {
+ ret = ctx->ops->send_mfc_command(ctx, &cmd);
+ } else {
+ int status;
+ ret = spufs_wait(ctx->mfc_wq,
+ spu_send_mfc_command(ctx, cmd, &status));
+ if (ret)
+ goto out;
+ if (status)
+ ret = status;
+ }
+
+ if (ret)
+ goto out_unlock;
+
+ ctx->tagwait |= 1 << cmd.tag;
+ ret = size;
+
+out_unlock:
+ spu_release(ctx);
+out:
+ return ret;
+}
+
+static __poll_t spufs_mfc_poll(struct file *file,poll_table *wait)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 free_elements, tagstatus;
+ __poll_t mask;
+
+ poll_wait(file, &ctx->mfc_wq, wait);
+
+ /*
+ * For now keep this uninterruptible and also ignore the rule
+ * that poll should not sleep. Will be fixed later.
+ */
+ mutex_lock(&ctx->state_mutex);
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2);
+ free_elements = ctx->ops->get_mfc_free_elements(ctx);
+ tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
+ spu_release(ctx);
+
+ mask = 0;
+ if (free_elements & 0xffff)
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ if (tagstatus & ctx->tagwait)
+ mask |= EPOLLIN | EPOLLRDNORM;
+
+ pr_debug("%s: free %d tagstatus %d tagwait %d\n", __func__,
+ free_elements, tagstatus, ctx->tagwait);
+
+ return mask;
+}
+
+static int spufs_mfc_flush(struct file *file, fl_owner_t id)
+{
+ struct spu_context *ctx = file->private_data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ goto out;
+#if 0
+/* this currently hangs */
+ ret = spufs_wait(ctx->mfc_wq,
+ ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2));
+ if (ret)
+ goto out;
+ ret = spufs_wait(ctx->mfc_wq,
+ ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
+ if (ret)
+ goto out;
+#else
+ ret = 0;
+#endif
+ spu_release(ctx);
+out:
+ return ret;
+}
+
+static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct inode *inode = file_inode(file);
+ int err = file_write_and_wait_range(file, start, end);
+ if (!err) {
+ inode_lock(inode);
+ err = spufs_mfc_flush(file, NULL);
+ inode_unlock(inode);
+ }
+ return err;
+}
+
+static const struct file_operations spufs_mfc_fops = {
+ .open = spufs_mfc_open,
+ .release = spufs_mfc_release,
+ .read = spufs_mfc_read,
+ .write = spufs_mfc_write,
+ .poll = spufs_mfc_poll,
+ .flush = spufs_mfc_flush,
+ .fsync = spufs_mfc_fsync,
+ .mmap = spufs_mfc_mmap,
+ .llseek = no_llseek,
+};
+
+static int spufs_npc_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+ ctx->ops->npc_write(ctx, val);
+ spu_release(ctx);
+
+ return 0;
+}
+
+static u64 spufs_npc_get(struct spu_context *ctx)
+{
+ return ctx->ops->npc_read(ctx);
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set,
+ "0x%llx\n", SPU_ATTR_ACQUIRE);
+
+static int spufs_decr_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ lscsa->decr.slot[0] = (u32) val;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_decr_get(struct spu_context *ctx)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return lscsa->decr.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
+ "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_decr_status_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ if (val)
+ ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
+ else
+ ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_decr_status_get(struct spu_context *ctx)
+{
+ if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
+ return SPU_DECR_STATUS_RUNNING;
+ else
+ return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
+ spufs_decr_status_set, "0x%llx\n",
+ SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_event_mask_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ lscsa->event_mask.slot[0] = (u32) val;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_event_mask_get(struct spu_context *ctx)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return lscsa->event_mask.slot[0];
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
+ spufs_event_mask_set, "0x%llx\n",
+ SPU_ATTR_ACQUIRE_SAVED);
+
+static u64 spufs_event_status_get(struct spu_context *ctx)
+{
+ struct spu_state *state = &ctx->csa;
+ u64 stat;
+ stat = state->spu_chnlcnt_RW[0];
+ if (stat)
+ return state->spu_chnldata_RW[0];
+ return 0;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
+ NULL, "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static int spufs_srr0_set(void *data, u64 val)
+{
+ struct spu_context *ctx = data;
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ lscsa->srr0.slot[0] = (u32) val;
+ spu_release_saved(ctx);
+
+ return 0;
+}
+
+static u64 spufs_srr0_get(struct spu_context *ctx)
+{
+ struct spu_lscsa *lscsa = ctx->csa.lscsa;
+ return lscsa->srr0.slot[0];
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
+ "0x%llx\n", SPU_ATTR_ACQUIRE_SAVED)
+
+static u64 spufs_id_get(struct spu_context *ctx)
+{
+ u64 num;
+
+ if (ctx->state == SPU_STATE_RUNNABLE)
+ num = ctx->spu->number;
+ else
+ num = (unsigned int)-1;
+
+ return num;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n",
+ SPU_ATTR_ACQUIRE)
+
+static u64 spufs_object_id_get(struct spu_context *ctx)
+{
+ /* FIXME: Should there really be no locking here? */
+ return ctx->object_id;
+}
+
+static int spufs_object_id_set(void *data, u64 id)
+{
+ struct spu_context *ctx = data;
+ ctx->object_id = id;
+
+ return 0;
+}
+
+DEFINE_SPUFS_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get,
+ spufs_object_id_set, "0x%llx\n", SPU_ATTR_NOACQUIRE);
+
+static u64 spufs_lslr_get(struct spu_context *ctx)
+{
+ return ctx->csa.priv2.spu_lslr_RW;
+}
+DEFINE_SPUFS_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n",
+ SPU_ATTR_ACQUIRE_SAVED);
+
+static int spufs_info_open(struct inode *inode, struct file *file)
+{
+ struct spufs_inode_info *i = SPUFS_I(inode);
+ struct spu_context *ctx = i->i_ctx;
+ file->private_data = ctx;
+ return 0;
+}
+
+static int spufs_caps_show(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+
+ if (!(ctx->flags & SPU_CREATE_NOSCHED))
+ seq_puts(s, "sched\n");
+ if (!(ctx->flags & SPU_CREATE_ISOLATE))
+ seq_puts(s, "step\n");
+ return 0;
+}
+
+static int spufs_caps_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_caps_fops = {
+ .open = spufs_caps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static ssize_t spufs_mbox_info_dump(struct spu_context *ctx,
+ struct coredump_params *cprm)
+{
+ if (!(ctx->csa.prob.mb_stat_R & 0x0000ff))
+ return 0;
+ return spufs_dump_emit(cprm, &ctx->csa.prob.pu_mb_R,
+ sizeof(ctx->csa.prob.pu_mb_R));
+}
+
+static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 stat, data;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ stat = ctx->csa.prob.mb_stat_R;
+ data = ctx->csa.prob.pu_mb_R;
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ /* EOF if there's no entry in the mbox */
+ if (!(stat & 0x0000ff))
+ return 0;
+
+ return simple_read_from_buffer(buf, len, pos, &data, sizeof(data));
+}
+
+static const struct file_operations spufs_mbox_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_mbox_info_read,
+ .llseek = generic_file_llseek,
+};
+
+static ssize_t spufs_ibox_info_dump(struct spu_context *ctx,
+ struct coredump_params *cprm)
+{
+ if (!(ctx->csa.prob.mb_stat_R & 0xff0000))
+ return 0;
+ return spufs_dump_emit(cprm, &ctx->csa.priv2.puint_mb_R,
+ sizeof(ctx->csa.priv2.puint_mb_R));
+}
+
+static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 stat, data;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ stat = ctx->csa.prob.mb_stat_R;
+ data = ctx->csa.priv2.puint_mb_R;
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ /* EOF if there's no entry in the ibox */
+ if (!(stat & 0xff0000))
+ return 0;
+
+ return simple_read_from_buffer(buf, len, pos, &data, sizeof(data));
+}
+
+static const struct file_operations spufs_ibox_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_ibox_info_read,
+ .llseek = generic_file_llseek,
+};
+
+static size_t spufs_wbox_info_cnt(struct spu_context *ctx)
+{
+ return (4 - ((ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8)) * sizeof(u32);
+}
+
+static ssize_t spufs_wbox_info_dump(struct spu_context *ctx,
+ struct coredump_params *cprm)
+{
+ return spufs_dump_emit(cprm, &ctx->csa.spu_mailbox_data,
+ spufs_wbox_info_cnt(ctx));
+}
+
+static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ u32 data[ARRAY_SIZE(ctx->csa.spu_mailbox_data)];
+ int ret, count;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ count = spufs_wbox_info_cnt(ctx);
+ memcpy(&data, &ctx->csa.spu_mailbox_data, sizeof(data));
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return simple_read_from_buffer(buf, len, pos, &data,
+ count * sizeof(u32));
+}
+
+static const struct file_operations spufs_wbox_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_wbox_info_read,
+ .llseek = generic_file_llseek,
+};
+
+static void spufs_get_dma_info(struct spu_context *ctx,
+ struct spu_dma_info *info)
+{
+ int i;
+
+ info->dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW;
+ info->dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0];
+ info->dma_info_status = ctx->csa.spu_chnldata_RW[24];
+ info->dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25];
+ info->dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27];
+ for (i = 0; i < 16; i++) {
+ struct mfc_cq_sr *qp = &info->dma_info_command_data[i];
+ struct mfc_cq_sr *spuqp = &ctx->csa.priv2.spuq[i];
+
+ qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW;
+ qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW;
+ qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW;
+ qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW;
+ }
+}
+
+static ssize_t spufs_dma_info_dump(struct spu_context *ctx,
+ struct coredump_params *cprm)
+{
+ struct spu_dma_info info;
+
+ spufs_get_dma_info(ctx, &info);
+ return spufs_dump_emit(cprm, &info, sizeof(info));
+}
+
+static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_dma_info info;
+ int ret;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ spufs_get_dma_info(ctx, &info);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return simple_read_from_buffer(buf, len, pos, &info,
+ sizeof(info));
+}
+
+static const struct file_operations spufs_dma_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_dma_info_read,
+ .llseek = no_llseek,
+};
+
+static void spufs_get_proxydma_info(struct spu_context *ctx,
+ struct spu_proxydma_info *info)
+{
+ int i;
+
+ info->proxydma_info_type = ctx->csa.prob.dma_querytype_RW;
+ info->proxydma_info_mask = ctx->csa.prob.dma_querymask_RW;
+ info->proxydma_info_status = ctx->csa.prob.dma_tagstatus_R;
+
+ for (i = 0; i < 8; i++) {
+ struct mfc_cq_sr *qp = &info->proxydma_info_command_data[i];
+ struct mfc_cq_sr *puqp = &ctx->csa.priv2.puq[i];
+
+ qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW;
+ qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW;
+ qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW;
+ qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW;
+ }
+}
+
+static ssize_t spufs_proxydma_info_dump(struct spu_context *ctx,
+ struct coredump_params *cprm)
+{
+ struct spu_proxydma_info info;
+
+ spufs_get_proxydma_info(ctx, &info);
+ return spufs_dump_emit(cprm, &info, sizeof(info));
+}
+
+static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ struct spu_context *ctx = file->private_data;
+ struct spu_proxydma_info info;
+ int ret;
+
+ if (len < sizeof(info))
+ return -EINVAL;
+
+ ret = spu_acquire_saved(ctx);
+ if (ret)
+ return ret;
+ spin_lock(&ctx->csa.register_lock);
+ spufs_get_proxydma_info(ctx, &info);
+ spin_unlock(&ctx->csa.register_lock);
+ spu_release_saved(ctx);
+
+ return simple_read_from_buffer(buf, len, pos, &info,
+ sizeof(info));
+}
+
+static const struct file_operations spufs_proxydma_info_fops = {
+ .open = spufs_info_open,
+ .read = spufs_proxydma_info_read,
+ .llseek = no_llseek,
+};
+
+static int spufs_show_tid(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+
+ seq_printf(s, "%d\n", ctx->tid);
+ return 0;
+}
+
+static int spufs_tid_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_tid_fops = {
+ .open = spufs_tid_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const char *ctx_state_names[] = {
+ "user", "system", "iowait", "loaded"
+};
+
+static unsigned long long spufs_acct_time(struct spu_context *ctx,
+ enum spu_utilization_state state)
+{
+ unsigned long long time = ctx->stats.times[state];
+
+ /*
+ * In general, utilization statistics are updated by the controlling
+ * thread as the spu context moves through various well defined
+ * state transitions, but if the context is lazily loaded its
+ * utilization statistics are not updated as the controlling thread
+ * is not tightly coupled with the execution of the spu context. We
+ * calculate and apply the time delta from the last recorded state
+ * of the spu context.
+ */
+ if (ctx->spu && ctx->stats.util_state == state) {
+ time += ktime_get_ns() - ctx->stats.tstamp;
+ }
+
+ return time / NSEC_PER_MSEC;
+}
+
+static unsigned long long spufs_slb_flts(struct spu_context *ctx)
+{
+ unsigned long long slb_flts = ctx->stats.slb_flt;
+
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ slb_flts += (ctx->spu->stats.slb_flt -
+ ctx->stats.slb_flt_base);
+ }
+
+ return slb_flts;
+}
+
+static unsigned long long spufs_class2_intrs(struct spu_context *ctx)
+{
+ unsigned long long class2_intrs = ctx->stats.class2_intr;
+
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ class2_intrs += (ctx->spu->stats.class2_intr -
+ ctx->stats.class2_intr_base);
+ }
+
+ return class2_intrs;
+}
+
+
+static int spufs_show_stat(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+ int ret;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ return ret;
+
+ seq_printf(s, "%s %llu %llu %llu %llu "
+ "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+ ctx_state_names[ctx->stats.util_state],
+ spufs_acct_time(ctx, SPU_UTIL_USER),
+ spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
+ spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
+ spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
+ ctx->stats.vol_ctx_switch,
+ ctx->stats.invol_ctx_switch,
+ spufs_slb_flts(ctx),
+ ctx->stats.hash_flt,
+ ctx->stats.min_flt,
+ ctx->stats.maj_flt,
+ spufs_class2_intrs(ctx),
+ ctx->stats.libassist);
+ spu_release(ctx);
+ return 0;
+}
+
+static int spufs_stat_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_stat_fops = {
+ .open = spufs_stat_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static inline int spufs_switch_log_used(struct spu_context *ctx)
+{
+ return (ctx->switch_log->head - ctx->switch_log->tail) %
+ SWITCH_LOG_BUFSIZE;
+}
+
+static inline int spufs_switch_log_avail(struct spu_context *ctx)
+{
+ return SWITCH_LOG_BUFSIZE - spufs_switch_log_used(ctx);
+}
+
+static int spufs_switch_log_open(struct inode *inode, struct file *file)
+{
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ int rc;
+
+ rc = spu_acquire(ctx);
+ if (rc)
+ return rc;
+
+ if (ctx->switch_log) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ ctx->switch_log = kmalloc(struct_size(ctx->switch_log, log,
+ SWITCH_LOG_BUFSIZE), GFP_KERNEL);
+
+ if (!ctx->switch_log) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ ctx->switch_log->head = ctx->switch_log->tail = 0;
+ init_waitqueue_head(&ctx->switch_log->wait);
+ rc = 0;
+
+out:
+ spu_release(ctx);
+ return rc;
+}
+
+static int spufs_switch_log_release(struct inode *inode, struct file *file)
+{
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ int rc;
+
+ rc = spu_acquire(ctx);
+ if (rc)
+ return rc;
+
+ kfree(ctx->switch_log);
+ ctx->switch_log = NULL;
+ spu_release(ctx);
+
+ return 0;
+}
+
+static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
+{
+ struct switch_log_entry *p;
+
+ p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
+
+ return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n",
+ (unsigned long long) p->tstamp.tv_sec,
+ (unsigned int) p->tstamp.tv_nsec,
+ p->spu_id,
+ (unsigned int) p->type,
+ (unsigned int) p->val,
+ (unsigned long long) p->timebase);
+}
+
+static ssize_t spufs_switch_log_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ int error = 0, cnt = 0;
+
+ if (!buf)
+ return -EINVAL;
+
+ error = spu_acquire(ctx);
+ if (error)
+ return error;
+
+ while (cnt < len) {
+ char tbuf[128];
+ int width;
+
+ if (spufs_switch_log_used(ctx) == 0) {
+ if (cnt > 0) {
+ /* If there's data ready to go, we can
+ * just return straight away */
+ break;
+
+ } else if (file->f_flags & O_NONBLOCK) {
+ error = -EAGAIN;
+ break;
+
+ } else {
+ /* spufs_wait will drop the mutex and
+ * re-acquire, but since we're in read(), the
+ * file cannot be _released (and so
+ * ctx->switch_log is stable).
+ */
+ error = spufs_wait(ctx->switch_log->wait,
+ spufs_switch_log_used(ctx) > 0);
+
+ /* On error, spufs_wait returns without the
+ * state mutex held */
+ if (error)
+ return error;
+
+ /* We may have had entries read from underneath
+ * us while we dropped the mutex in spufs_wait,
+ * so re-check */
+ if (spufs_switch_log_used(ctx) == 0)
+ continue;
+ }
+ }
+
+ width = switch_log_sprint(ctx, tbuf, sizeof(tbuf));
+ if (width < len)
+ ctx->switch_log->tail =
+ (ctx->switch_log->tail + 1) %
+ SWITCH_LOG_BUFSIZE;
+ else
+ /* If the record is greater than space available return
+ * partial buffer (so far) */
+ break;
+
+ error = copy_to_user(buf + cnt, tbuf, width);
+ if (error)
+ break;
+ cnt += width;
+ }
+
+ spu_release(ctx);
+
+ return cnt == 0 ? error : cnt;
+}
+
+static __poll_t spufs_switch_log_poll(struct file *file, poll_table *wait)
+{
+ struct inode *inode = file_inode(file);
+ struct spu_context *ctx = SPUFS_I(inode)->i_ctx;
+ __poll_t mask = 0;
+ int rc;
+
+ poll_wait(file, &ctx->switch_log->wait, wait);
+
+ rc = spu_acquire(ctx);
+ if (rc)
+ return rc;
+
+ if (spufs_switch_log_used(ctx) > 0)
+ mask |= EPOLLIN;
+
+ spu_release(ctx);
+
+ return mask;
+}
+
+static const struct file_operations spufs_switch_log_fops = {
+ .open = spufs_switch_log_open,
+ .read = spufs_switch_log_read,
+ .poll = spufs_switch_log_poll,
+ .release = spufs_switch_log_release,
+ .llseek = no_llseek,
+};
+
+/**
+ * Log a context switch event to a switch log reader.
+ *
+ * Must be called with ctx->state_mutex held.
+ */
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+ u32 type, u32 val)
+{
+ if (!ctx->switch_log)
+ return;
+
+ if (spufs_switch_log_avail(ctx) > 1) {
+ struct switch_log_entry *p;
+
+ p = ctx->switch_log->log + ctx->switch_log->head;
+ ktime_get_ts64(&p->tstamp);
+ p->timebase = get_tb();
+ p->spu_id = spu ? spu->number : -1;
+ p->type = type;
+ p->val = val;
+
+ ctx->switch_log->head =
+ (ctx->switch_log->head + 1) % SWITCH_LOG_BUFSIZE;
+ }
+
+ wake_up(&ctx->switch_log->wait);
+}
+
+static int spufs_show_ctx(struct seq_file *s, void *private)
+{
+ struct spu_context *ctx = s->private;
+ u64 mfc_control_RW;
+
+ mutex_lock(&ctx->state_mutex);
+ if (ctx->spu) {
+ struct spu *spu = ctx->spu;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ spin_lock_irq(&spu->register_lock);
+ mfc_control_RW = in_be64(&priv2->mfc_control_RW);
+ spin_unlock_irq(&spu->register_lock);
+ } else {
+ struct spu_state *csa = &ctx->csa;
+
+ mfc_control_RW = csa->priv2.mfc_control_RW;
+ }
+
+ seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)"
+ " %c %llx %llx %llx %llx %x %x\n",
+ ctx->state == SPU_STATE_SAVED ? 'S' : 'R',
+ ctx->flags,
+ ctx->sched_flags,
+ ctx->prio,
+ ctx->time_slice,
+ ctx->spu ? ctx->spu->number : -1,
+ !list_empty(&ctx->rq) ? 'q' : ' ',
+ ctx->csa.class_0_pending,
+ ctx->csa.class_0_dar,
+ ctx->csa.class_1_dsisr,
+ mfc_control_RW,
+ ctx->ops->runcntl_read(ctx),
+ ctx->ops->status_read(ctx));
+
+ mutex_unlock(&ctx->state_mutex);
+
+ return 0;
+}
+
+static int spufs_ctx_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, spufs_show_ctx, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_ctx_fops = {
+ .open = spufs_ctx_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+const struct spufs_tree_descr spufs_dir_contents[] = {
+ { "capabilities", &spufs_caps_fops, 0444, },
+ { "mem", &spufs_mem_fops, 0666, LS_SIZE, },
+ { "regs", &spufs_regs_fops, 0666, sizeof(struct spu_reg128[128]), },
+ { "mbox", &spufs_mbox_fops, 0444, },
+ { "ibox", &spufs_ibox_fops, 0444, },
+ { "wbox", &spufs_wbox_fops, 0222, },
+ { "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+ { "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+ { "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+ { "signal1", &spufs_signal1_fops, 0666, },
+ { "signal2", &spufs_signal2_fops, 0666, },
+ { "signal1_type", &spufs_signal1_type, 0666, },
+ { "signal2_type", &spufs_signal2_type, 0666, },
+ { "cntl", &spufs_cntl_fops, 0666, },
+ { "fpcr", &spufs_fpcr_fops, 0666, sizeof(struct spu_reg128), },
+ { "lslr", &spufs_lslr_ops, 0444, },
+ { "mfc", &spufs_mfc_fops, 0666, },
+ { "mss", &spufs_mss_fops, 0666, },
+ { "npc", &spufs_npc_ops, 0666, },
+ { "srr0", &spufs_srr0_ops, 0666, },
+ { "decr", &spufs_decr_ops, 0666, },
+ { "decr_status", &spufs_decr_status_ops, 0666, },
+ { "event_mask", &spufs_event_mask_ops, 0666, },
+ { "event_status", &spufs_event_status_ops, 0444, },
+ { "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+ { "phys-id", &spufs_id_ops, 0666, },
+ { "object-id", &spufs_object_id_ops, 0666, },
+ { "mbox_info", &spufs_mbox_info_fops, 0444, sizeof(u32), },
+ { "ibox_info", &spufs_ibox_info_fops, 0444, sizeof(u32), },
+ { "wbox_info", &spufs_wbox_info_fops, 0444, sizeof(u32), },
+ { "dma_info", &spufs_dma_info_fops, 0444,
+ sizeof(struct spu_dma_info), },
+ { "proxydma_info", &spufs_proxydma_info_fops, 0444,
+ sizeof(struct spu_proxydma_info)},
+ { "tid", &spufs_tid_fops, 0444, },
+ { "stat", &spufs_stat_fops, 0444, },
+ { "switch_log", &spufs_switch_log_fops, 0444 },
+ {},
+};
+
+const struct spufs_tree_descr spufs_dir_nosched_contents[] = {
+ { "capabilities", &spufs_caps_fops, 0444, },
+ { "mem", &spufs_mem_fops, 0666, LS_SIZE, },
+ { "mbox", &spufs_mbox_fops, 0444, },
+ { "ibox", &spufs_ibox_fops, 0444, },
+ { "wbox", &spufs_wbox_fops, 0222, },
+ { "mbox_stat", &spufs_mbox_stat_fops, 0444, sizeof(u32), },
+ { "ibox_stat", &spufs_ibox_stat_fops, 0444, sizeof(u32), },
+ { "wbox_stat", &spufs_wbox_stat_fops, 0444, sizeof(u32), },
+ { "signal1", &spufs_signal1_nosched_fops, 0222, },
+ { "signal2", &spufs_signal2_nosched_fops, 0222, },
+ { "signal1_type", &spufs_signal1_type, 0666, },
+ { "signal2_type", &spufs_signal2_type, 0666, },
+ { "mss", &spufs_mss_fops, 0666, },
+ { "mfc", &spufs_mfc_fops, 0666, },
+ { "cntl", &spufs_cntl_fops, 0666, },
+ { "npc", &spufs_npc_ops, 0666, },
+ { "psmap", &spufs_psmap_fops, 0666, SPUFS_PS_MAP_SIZE, },
+ { "phys-id", &spufs_id_ops, 0666, },
+ { "object-id", &spufs_object_id_ops, 0666, },
+ { "tid", &spufs_tid_fops, 0444, },
+ { "stat", &spufs_stat_fops, 0444, },
+ {},
+};
+
+const struct spufs_tree_descr spufs_dir_debug_contents[] = {
+ { ".ctx", &spufs_ctx_fops, 0444, },
+ {},
+};
+
+const struct spufs_coredump_reader spufs_coredump_read[] = {
+ { "regs", spufs_regs_dump, NULL, sizeof(struct spu_reg128[128])},
+ { "fpcr", spufs_fpcr_dump, NULL, sizeof(struct spu_reg128) },
+ { "lslr", NULL, spufs_lslr_get, 19 },
+ { "decr", NULL, spufs_decr_get, 19 },
+ { "decr_status", NULL, spufs_decr_status_get, 19 },
+ { "mem", spufs_mem_dump, NULL, LS_SIZE, },
+ { "signal1", spufs_signal1_dump, NULL, sizeof(u32) },
+ { "signal1_type", NULL, spufs_signal1_type_get, 19 },
+ { "signal2", spufs_signal2_dump, NULL, sizeof(u32) },
+ { "signal2_type", NULL, spufs_signal2_type_get, 19 },
+ { "event_mask", NULL, spufs_event_mask_get, 19 },
+ { "event_status", NULL, spufs_event_status_get, 19 },
+ { "mbox_info", spufs_mbox_info_dump, NULL, sizeof(u32) },
+ { "ibox_info", spufs_ibox_info_dump, NULL, sizeof(u32) },
+ { "wbox_info", spufs_wbox_info_dump, NULL, 4 * sizeof(u32)},
+ { "dma_info", spufs_dma_info_dump, NULL, sizeof(struct spu_dma_info)},
+ { "proxydma_info", spufs_proxydma_info_dump,
+ NULL, sizeof(struct spu_proxydma_info)},
+ { "object-id", NULL, spufs_object_id_get, 19 },
+ { "npc", NULL, spufs_npc_get, 19 },
+ { NULL },
+};
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c
new file mode 100644
index 000000000..827d338de
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "spufs.h"
+
+struct spu_gang *alloc_spu_gang(void)
+{
+ struct spu_gang *gang;
+
+ gang = kzalloc(sizeof *gang, GFP_KERNEL);
+ if (!gang)
+ goto out;
+
+ kref_init(&gang->kref);
+ mutex_init(&gang->mutex);
+ mutex_init(&gang->aff_mutex);
+ INIT_LIST_HEAD(&gang->list);
+ INIT_LIST_HEAD(&gang->aff_list_head);
+
+out:
+ return gang;
+}
+
+static void destroy_spu_gang(struct kref *kref)
+{
+ struct spu_gang *gang;
+ gang = container_of(kref, struct spu_gang, kref);
+ WARN_ON(gang->contexts || !list_empty(&gang->list));
+ kfree(gang);
+}
+
+struct spu_gang *get_spu_gang(struct spu_gang *gang)
+{
+ kref_get(&gang->kref);
+ return gang;
+}
+
+int put_spu_gang(struct spu_gang *gang)
+{
+ return kref_put(&gang->kref, &destroy_spu_gang);
+}
+
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+ mutex_lock(&gang->mutex);
+ ctx->gang = get_spu_gang(gang);
+ list_add(&ctx->gang_list, &gang->list);
+ gang->contexts++;
+ mutex_unlock(&gang->mutex);
+}
+
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
+{
+ mutex_lock(&gang->mutex);
+ WARN_ON(ctx->gang != gang);
+ if (!list_empty(&ctx->aff_list)) {
+ list_del_init(&ctx->aff_list);
+ gang->aff_flags &= ~AFF_OFFSETS_SET;
+ }
+ list_del_init(&ctx->gang_list);
+ gang->contexts--;
+ mutex_unlock(&gang->mutex);
+
+ put_spu_gang(gang);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c
new file mode 100644
index 000000000..8deaf786e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* hw_ops.c - query/set operations on active SPU context.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+#include "spufs.h"
+
+static int spu_hw_mbox_read(struct spu_context *ctx, u32 * data)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 mbox_stat;
+ int ret = 0;
+
+ spin_lock_irq(&spu->register_lock);
+ mbox_stat = in_be32(&prob->mb_stat_R);
+ if (mbox_stat & 0x0000ff) {
+ *data = in_be32(&prob->pu_mb_R);
+ ret = 4;
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static u32 spu_hw_mbox_stat_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->mb_stat_R);
+}
+
+static __poll_t spu_hw_mbox_stat_poll(struct spu_context *ctx, __poll_t events)
+{
+ struct spu *spu = ctx->spu;
+ __poll_t ret = 0;
+ u32 stat;
+
+ spin_lock_irq(&spu->register_lock);
+ stat = in_be32(&spu->problem->mb_stat_R);
+
+ /* if the requested event is there, return the poll
+ mask, otherwise enable the interrupt to get notified,
+ but first mark any pending interrupts as done so
+ we don't get woken up unnecessarily */
+
+ if (events & (EPOLLIN | EPOLLRDNORM)) {
+ if (stat & 0xff0000)
+ ret |= EPOLLIN | EPOLLRDNORM;
+ else {
+ spu_int_stat_clear(spu, 2, CLASS2_MAILBOX_INTR);
+ spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+ }
+ }
+ if (events & (EPOLLOUT | EPOLLWRNORM)) {
+ if (stat & 0x00ff00)
+ ret = EPOLLOUT | EPOLLWRNORM;
+ else {
+ spu_int_stat_clear(spu, 2,
+ CLASS2_MAILBOX_THRESHOLD_INTR);
+ spu_int_mask_or(spu, 2,
+ CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+ }
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static int spu_hw_ibox_read(struct spu_context *ctx, u32 * data)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_problem __iomem *prob = spu->problem;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int ret;
+
+ spin_lock_irq(&spu->register_lock);
+ if (in_be32(&prob->mb_stat_R) & 0xff0000) {
+ /* read the first available word */
+ *data = in_be64(&priv2->puint_mb_R);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt */
+ spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+ ret = 0;
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static int spu_hw_wbox_write(struct spu_context *ctx, u32 data)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_problem __iomem *prob = spu->problem;
+ int ret;
+
+ spin_lock_irq(&spu->register_lock);
+ if (in_be32(&prob->mb_stat_R) & 0x00ff00) {
+ /* we have space to write wbox_data to */
+ out_be32(&prob->spu_mb_W, data);
+ ret = 4;
+ } else {
+ /* make sure we get woken up by the interrupt when space
+ becomes available */
+ spu_int_mask_or(spu, 2, CLASS2_ENABLE_MAILBOX_THRESHOLD_INTR);
+ ret = 0;
+ }
+ spin_unlock_irq(&spu->register_lock);
+ return ret;
+}
+
+static void spu_hw_signal1_write(struct spu_context *ctx, u32 data)
+{
+ out_be32(&ctx->spu->problem->signal_notify1, data);
+}
+
+static void spu_hw_signal2_write(struct spu_context *ctx, u32 data)
+{
+ out_be32(&ctx->spu->problem->signal_notify2, data);
+}
+
+static void spu_hw_signal1_type_set(struct spu_context *ctx, u64 val)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 tmp;
+
+ spin_lock_irq(&spu->register_lock);
+ tmp = in_be64(&priv2->spu_cfg_RW);
+ if (val)
+ tmp |= 1;
+ else
+ tmp &= ~1;
+ out_be64(&priv2->spu_cfg_RW, tmp);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal1_type_get(struct spu_context *ctx)
+{
+ return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 1) != 0);
+}
+
+static void spu_hw_signal2_type_set(struct spu_context *ctx, u64 val)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 tmp;
+
+ spin_lock_irq(&spu->register_lock);
+ tmp = in_be64(&priv2->spu_cfg_RW);
+ if (val)
+ tmp |= 2;
+ else
+ tmp &= ~2;
+ out_be64(&priv2->spu_cfg_RW, tmp);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static u64 spu_hw_signal2_type_get(struct spu_context *ctx)
+{
+ return ((in_be64(&ctx->spu->priv2->spu_cfg_RW) & 2) != 0);
+}
+
+static u32 spu_hw_npc_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->spu_npc_RW);
+}
+
+static void spu_hw_npc_write(struct spu_context *ctx, u32 val)
+{
+ out_be32(&ctx->spu->problem->spu_npc_RW, val);
+}
+
+static u32 spu_hw_status_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->spu_status_R);
+}
+
+static char *spu_hw_get_ls(struct spu_context *ctx)
+{
+ return ctx->spu->local_store;
+}
+
+static void spu_hw_privcntl_write(struct spu_context *ctx, u64 val)
+{
+ out_be64(&ctx->spu->priv2->spu_privcntl_RW, val);
+}
+
+static u32 spu_hw_runcntl_read(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->spu_runcntl_RW);
+}
+
+static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val)
+{
+ spin_lock_irq(&ctx->spu->register_lock);
+ if (val & SPU_RUNCNTL_ISOLATE)
+ spu_hw_privcntl_write(ctx,
+ SPU_PRIVCNT_LOAD_REQUEST_ENABLE_MASK);
+ out_be32(&ctx->spu->problem->spu_runcntl_RW, val);
+ spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_runcntl_stop(struct spu_context *ctx)
+{
+ spin_lock_irq(&ctx->spu->register_lock);
+ out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING)
+ cpu_relax();
+ spin_unlock_irq(&ctx->spu->register_lock);
+}
+
+static void spu_hw_master_start(struct spu_context *ctx)
+{
+ struct spu *spu = ctx->spu;
+ u64 sr1;
+
+ spin_lock_irq(&spu->register_lock);
+ sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ spu_mfc_sr1_set(spu, sr1);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static void spu_hw_master_stop(struct spu_context *ctx)
+{
+ struct spu *spu = ctx->spu;
+ u64 sr1;
+
+ spin_lock_irq(&spu->register_lock);
+ sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+ spu_mfc_sr1_set(spu, sr1);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode)
+{
+ struct spu_problem __iomem *prob = ctx->spu->problem;
+ int ret;
+
+ spin_lock_irq(&ctx->spu->register_lock);
+ ret = -EAGAIN;
+ if (in_be32(&prob->dma_querytype_RW))
+ goto out;
+ ret = 0;
+ out_be32(&prob->dma_querymask_RW, mask);
+ out_be32(&prob->dma_querytype_RW, mode);
+out:
+ spin_unlock_irq(&ctx->spu->register_lock);
+ return ret;
+}
+
+static u32 spu_hw_read_mfc_tagstatus(struct spu_context * ctx)
+{
+ return in_be32(&ctx->spu->problem->dma_tagstatus_R);
+}
+
+static u32 spu_hw_get_mfc_free_elements(struct spu_context *ctx)
+{
+ return in_be32(&ctx->spu->problem->dma_qstatus_R);
+}
+
+static int spu_hw_send_mfc_command(struct spu_context *ctx,
+ struct mfc_dma_command *cmd)
+{
+ u32 status;
+ struct spu_problem __iomem *prob = ctx->spu->problem;
+
+ spin_lock_irq(&ctx->spu->register_lock);
+ out_be32(&prob->mfc_lsa_W, cmd->lsa);
+ out_be64(&prob->mfc_ea_W, cmd->ea);
+ out_be32(&prob->mfc_union_W.by32.mfc_size_tag32,
+ cmd->size << 16 | cmd->tag);
+ out_be32(&prob->mfc_union_W.by32.mfc_class_cmd32,
+ cmd->class << 16 | cmd->cmd);
+ status = in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+ spin_unlock_irq(&ctx->spu->register_lock);
+
+ switch (status & 0xffff) {
+ case 0:
+ return 0;
+ case 2:
+ return -EAGAIN;
+ default:
+ return -EINVAL;
+ }
+}
+
+static void spu_hw_restart_dma(struct spu_context *ctx)
+{
+ struct spu_priv2 __iomem *priv2 = ctx->spu->priv2;
+
+ if (!test_bit(SPU_CONTEXT_SWITCH_PENDING, &ctx->spu->flags))
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESTART_DMA_COMMAND);
+}
+
+struct spu_context_ops spu_hw_ops = {
+ .mbox_read = spu_hw_mbox_read,
+ .mbox_stat_read = spu_hw_mbox_stat_read,
+ .mbox_stat_poll = spu_hw_mbox_stat_poll,
+ .ibox_read = spu_hw_ibox_read,
+ .wbox_write = spu_hw_wbox_write,
+ .signal1_write = spu_hw_signal1_write,
+ .signal2_write = spu_hw_signal2_write,
+ .signal1_type_set = spu_hw_signal1_type_set,
+ .signal1_type_get = spu_hw_signal1_type_get,
+ .signal2_type_set = spu_hw_signal2_type_set,
+ .signal2_type_get = spu_hw_signal2_type_get,
+ .npc_read = spu_hw_npc_read,
+ .npc_write = spu_hw_npc_write,
+ .status_read = spu_hw_status_read,
+ .get_ls = spu_hw_get_ls,
+ .privcntl_write = spu_hw_privcntl_write,
+ .runcntl_read = spu_hw_runcntl_read,
+ .runcntl_write = spu_hw_runcntl_write,
+ .runcntl_stop = spu_hw_runcntl_stop,
+ .master_start = spu_hw_master_start,
+ .master_stop = spu_hw_master_stop,
+ .set_mfc_query = spu_hw_set_mfc_query,
+ .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus,
+ .get_mfc_free_elements = spu_hw_get_mfc_free_elements,
+ .send_mfc_command = spu_hw_send_mfc_command,
+ .restart_dma = spu_hw_restart_dma,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
new file mode 100644
index 000000000..38c5be34c
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -0,0 +1,826 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/fsnotify.h>
+#include <linux/backing-dev.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/of.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+
+struct spufs_sb_info {
+ bool debug;
+};
+
+static struct kmem_cache *spufs_inode_cache;
+char *isolated_loader;
+static int isolated_loader_size;
+
+static struct spufs_sb_info *spufs_get_sb_info(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+static struct inode *
+spufs_alloc_inode(struct super_block *sb)
+{
+ struct spufs_inode_info *ei;
+
+ ei = kmem_cache_alloc(spufs_inode_cache, GFP_KERNEL);
+ if (!ei)
+ return NULL;
+
+ ei->i_gang = NULL;
+ ei->i_ctx = NULL;
+ ei->i_openers = 0;
+
+ return &ei->vfs_inode;
+}
+
+static void spufs_free_inode(struct inode *inode)
+{
+ kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
+}
+
+static void
+spufs_init_once(void *p)
+{
+ struct spufs_inode_info *ei = p;
+
+ inode_init_once(&ei->vfs_inode);
+}
+
+static struct inode *
+spufs_new_inode(struct super_block *sb, umode_t mode)
+{
+ struct inode *inode;
+
+ inode = new_inode(sb);
+ if (!inode)
+ goto out;
+
+ inode->i_ino = get_next_ino();
+ inode->i_mode = mode;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+out:
+ return inode;
+}
+
+static int
+spufs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
+ struct iattr *attr)
+{
+ struct inode *inode = d_inode(dentry);
+
+ if ((attr->ia_valid & ATTR_SIZE) &&
+ (attr->ia_size != inode->i_size))
+ return -EINVAL;
+ setattr_copy(&nop_mnt_idmap, inode, attr);
+ mark_inode_dirty(inode);
+ return 0;
+}
+
+
+static int
+spufs_new_file(struct super_block *sb, struct dentry *dentry,
+ const struct file_operations *fops, umode_t mode,
+ size_t size, struct spu_context *ctx)
+{
+ static const struct inode_operations spufs_file_iops = {
+ .setattr = spufs_setattr,
+ };
+ struct inode *inode;
+ int ret;
+
+ ret = -ENOSPC;
+ inode = spufs_new_inode(sb, S_IFREG | mode);
+ if (!inode)
+ goto out;
+
+ ret = 0;
+ inode->i_op = &spufs_file_iops;
+ inode->i_fop = fops;
+ inode->i_size = size;
+ inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx);
+ d_add(dentry, inode);
+out:
+ return ret;
+}
+
+static void
+spufs_evict_inode(struct inode *inode)
+{
+ struct spufs_inode_info *ei = SPUFS_I(inode);
+ clear_inode(inode);
+ if (ei->i_ctx)
+ put_spu_context(ei->i_ctx);
+ if (ei->i_gang)
+ put_spu_gang(ei->i_gang);
+}
+
+static void spufs_prune_dir(struct dentry *dir)
+{
+ struct dentry *dentry, *tmp;
+
+ inode_lock(d_inode(dir));
+ list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
+ spin_lock(&dentry->d_lock);
+ if (simple_positive(dentry)) {
+ dget_dlock(dentry);
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+ simple_unlink(d_inode(dir), dentry);
+ /* XXX: what was dcache_lock protecting here? Other
+ * filesystems (IB, configfs) release dcache_lock
+ * before unlink */
+ dput(dentry);
+ } else {
+ spin_unlock(&dentry->d_lock);
+ }
+ }
+ shrink_dcache_parent(dir);
+ inode_unlock(d_inode(dir));
+}
+
+/* Caller must hold parent->i_mutex */
+static int spufs_rmdir(struct inode *parent, struct dentry *dir)
+{
+ /* remove all entries */
+ int res;
+ spufs_prune_dir(dir);
+ d_drop(dir);
+ res = simple_rmdir(parent, dir);
+ /* We have to give up the mm_struct */
+ spu_forget(SPUFS_I(d_inode(dir))->i_ctx);
+ return res;
+}
+
+static int spufs_fill_dir(struct dentry *dir,
+ const struct spufs_tree_descr *files, umode_t mode,
+ struct spu_context *ctx)
+{
+ while (files->name && files->name[0]) {
+ int ret;
+ struct dentry *dentry = d_alloc_name(dir, files->name);
+ if (!dentry)
+ return -ENOMEM;
+ ret = spufs_new_file(dir->d_sb, dentry, files->ops,
+ files->mode & mode, files->size, ctx);
+ if (ret)
+ return ret;
+ files++;
+ }
+ return 0;
+}
+
+static int spufs_dir_close(struct inode *inode, struct file *file)
+{
+ struct inode *parent;
+ struct dentry *dir;
+ int ret;
+
+ dir = file->f_path.dentry;
+ parent = d_inode(dir->d_parent);
+
+ inode_lock_nested(parent, I_MUTEX_PARENT);
+ ret = spufs_rmdir(parent, dir);
+ inode_unlock(parent);
+ WARN_ON(ret);
+
+ return dcache_dir_close(inode, file);
+}
+
+const struct file_operations spufs_context_fops = {
+ .open = dcache_dir_open,
+ .release = spufs_dir_close,
+ .llseek = dcache_dir_lseek,
+ .read = generic_read_dir,
+ .iterate_shared = dcache_readdir,
+ .fsync = noop_fsync,
+};
+EXPORT_SYMBOL_GPL(spufs_context_fops);
+
+static int
+spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
+ umode_t mode)
+{
+ int ret;
+ struct inode *inode;
+ struct spu_context *ctx;
+
+ inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+ if (!inode)
+ return -ENOSPC;
+
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode | S_IFDIR);
+ ctx = alloc_spu_context(SPUFS_I(dir)->i_gang); /* XXX gang */
+ SPUFS_I(inode)->i_ctx = ctx;
+ if (!ctx) {
+ iput(inode);
+ return -ENOSPC;
+ }
+
+ ctx->flags = flags;
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+
+ inode_lock(inode);
+
+ dget(dentry);
+ inc_nlink(dir);
+ inc_nlink(inode);
+
+ d_instantiate(dentry, inode);
+
+ if (flags & SPU_CREATE_NOSCHED)
+ ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
+ mode, ctx);
+ else
+ ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx);
+
+ if (!ret && spufs_get_sb_info(dir->i_sb)->debug)
+ ret = spufs_fill_dir(dentry, spufs_dir_debug_contents,
+ mode, ctx);
+
+ if (ret)
+ spufs_rmdir(dir, dentry);
+
+ inode_unlock(inode);
+
+ return ret;
+}
+
+static int spufs_context_open(const struct path *path)
+{
+ int ret;
+ struct file *filp;
+
+ ret = get_unused_fd_flags(0);
+ if (ret < 0)
+ return ret;
+
+ filp = dentry_open(path, O_RDONLY, current_cred());
+ if (IS_ERR(filp)) {
+ put_unused_fd(ret);
+ return PTR_ERR(filp);
+ }
+
+ filp->f_op = &spufs_context_fops;
+ fd_install(ret, filp);
+ return ret;
+}
+
+static struct spu_context *
+spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
+ struct file *filp)
+{
+ struct spu_context *tmp, *neighbor, *err;
+ int count, node;
+ int aff_supp;
+
+ aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
+ struct spu, cbe_list))->aff_list);
+
+ if (!aff_supp)
+ return ERR_PTR(-EINVAL);
+
+ if (flags & SPU_CREATE_GANG)
+ return ERR_PTR(-EINVAL);
+
+ if (flags & SPU_CREATE_AFFINITY_MEM &&
+ gang->aff_ref_ctx &&
+ gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
+ return ERR_PTR(-EEXIST);
+
+ if (gang->aff_flags & AFF_MERGED)
+ return ERR_PTR(-EBUSY);
+
+ neighbor = NULL;
+ if (flags & SPU_CREATE_AFFINITY_SPU) {
+ if (!filp || filp->f_op != &spufs_context_fops)
+ return ERR_PTR(-EINVAL);
+
+ neighbor = get_spu_context(
+ SPUFS_I(file_inode(filp))->i_ctx);
+
+ if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
+ !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
+ !list_entry(neighbor->aff_list.next, struct spu_context,
+ aff_list)->aff_head) {
+ err = ERR_PTR(-EEXIST);
+ goto out_put_neighbor;
+ }
+
+ if (gang != neighbor->gang) {
+ err = ERR_PTR(-EINVAL);
+ goto out_put_neighbor;
+ }
+
+ count = 1;
+ list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+ count++;
+ if (list_empty(&neighbor->aff_list))
+ count++;
+
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ if ((cbe_spu_info[node].n_spus - atomic_read(
+ &cbe_spu_info[node].reserved_spus)) >= count)
+ break;
+ }
+
+ if (node == MAX_NUMNODES) {
+ err = ERR_PTR(-EEXIST);
+ goto out_put_neighbor;
+ }
+ }
+
+ return neighbor;
+
+out_put_neighbor:
+ put_spu_context(neighbor);
+ return err;
+}
+
+static void
+spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
+ struct spu_context *neighbor)
+{
+ if (flags & SPU_CREATE_AFFINITY_MEM)
+ ctx->gang->aff_ref_ctx = ctx;
+
+ if (flags & SPU_CREATE_AFFINITY_SPU) {
+ if (list_empty(&neighbor->aff_list)) {
+ list_add_tail(&neighbor->aff_list,
+ &ctx->gang->aff_list_head);
+ neighbor->aff_head = 1;
+ }
+
+ if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
+ || list_entry(neighbor->aff_list.next, struct spu_context,
+ aff_list)->aff_head) {
+ list_add(&ctx->aff_list, &neighbor->aff_list);
+ } else {
+ list_add_tail(&ctx->aff_list, &neighbor->aff_list);
+ if (neighbor->aff_head) {
+ neighbor->aff_head = 0;
+ ctx->aff_head = 1;
+ }
+ }
+
+ if (!ctx->gang->aff_ref_ctx)
+ ctx->gang->aff_ref_ctx = ctx;
+ }
+}
+
+static int
+spufs_create_context(struct inode *inode, struct dentry *dentry,
+ struct vfsmount *mnt, int flags, umode_t mode,
+ struct file *aff_filp)
+{
+ int ret;
+ int affinity;
+ struct spu_gang *gang;
+ struct spu_context *neighbor;
+ struct path path = {.mnt = mnt, .dentry = dentry};
+
+ if ((flags & SPU_CREATE_NOSCHED) &&
+ !capable(CAP_SYS_NICE))
+ return -EPERM;
+
+ if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE))
+ == SPU_CREATE_ISOLATE)
+ return -EINVAL;
+
+ if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
+ return -ENODEV;
+
+ gang = NULL;
+ neighbor = NULL;
+ affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
+ if (affinity) {
+ gang = SPUFS_I(inode)->i_gang;
+ if (!gang)
+ return -EINVAL;
+ mutex_lock(&gang->aff_mutex);
+ neighbor = spufs_assert_affinity(flags, gang, aff_filp);
+ if (IS_ERR(neighbor)) {
+ ret = PTR_ERR(neighbor);
+ goto out_aff_unlock;
+ }
+ }
+
+ ret = spufs_mkdir(inode, dentry, flags, mode & 0777);
+ if (ret)
+ goto out_aff_unlock;
+
+ if (affinity) {
+ spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx,
+ neighbor);
+ if (neighbor)
+ put_spu_context(neighbor);
+ }
+
+ ret = spufs_context_open(&path);
+ if (ret < 0)
+ WARN_ON(spufs_rmdir(inode, dentry));
+
+out_aff_unlock:
+ if (affinity)
+ mutex_unlock(&gang->aff_mutex);
+ return ret;
+}
+
+static int
+spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ int ret;
+ struct inode *inode;
+ struct spu_gang *gang;
+
+ ret = -ENOSPC;
+ inode = spufs_new_inode(dir->i_sb, mode | S_IFDIR);
+ if (!inode)
+ goto out;
+
+ ret = 0;
+ inode_init_owner(&nop_mnt_idmap, inode, dir, mode | S_IFDIR);
+ gang = alloc_spu_gang();
+ SPUFS_I(inode)->i_ctx = NULL;
+ SPUFS_I(inode)->i_gang = gang;
+ if (!gang) {
+ ret = -ENOMEM;
+ goto out_iput;
+ }
+
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+
+ d_instantiate(dentry, inode);
+ inc_nlink(dir);
+ inc_nlink(d_inode(dentry));
+ return ret;
+
+out_iput:
+ iput(inode);
+out:
+ return ret;
+}
+
+static int spufs_gang_open(const struct path *path)
+{
+ int ret;
+ struct file *filp;
+
+ ret = get_unused_fd_flags(0);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * get references for dget and mntget, will be released
+ * in error path of *_open().
+ */
+ filp = dentry_open(path, O_RDONLY, current_cred());
+ if (IS_ERR(filp)) {
+ put_unused_fd(ret);
+ return PTR_ERR(filp);
+ }
+
+ filp->f_op = &simple_dir_operations;
+ fd_install(ret, filp);
+ return ret;
+}
+
+static int spufs_create_gang(struct inode *inode,
+ struct dentry *dentry,
+ struct vfsmount *mnt, umode_t mode)
+{
+ struct path path = {.mnt = mnt, .dentry = dentry};
+ int ret;
+
+ ret = spufs_mkgang(inode, dentry, mode & 0777);
+ if (!ret) {
+ ret = spufs_gang_open(&path);
+ if (ret < 0) {
+ int err = simple_rmdir(inode, dentry);
+ WARN_ON(err);
+ }
+ }
+ return ret;
+}
+
+
+static struct file_system_type spufs_type;
+
+long spufs_create(const struct path *path, struct dentry *dentry,
+ unsigned int flags, umode_t mode, struct file *filp)
+{
+ struct inode *dir = d_inode(path->dentry);
+ int ret;
+
+ /* check if we are on spufs */
+ if (path->dentry->d_sb->s_type != &spufs_type)
+ return -EINVAL;
+
+ /* don't accept undefined flags */
+ if (flags & (~SPU_CREATE_FLAG_ALL))
+ return -EINVAL;
+
+ /* only threads can be underneath a gang */
+ if (path->dentry != path->dentry->d_sb->s_root)
+ if ((flags & SPU_CREATE_GANG) || !SPUFS_I(dir)->i_gang)
+ return -EINVAL;
+
+ mode &= ~current_umask();
+
+ if (flags & SPU_CREATE_GANG)
+ ret = spufs_create_gang(dir, dentry, path->mnt, mode);
+ else
+ ret = spufs_create_context(dir, dentry, path->mnt, flags, mode,
+ filp);
+ if (ret >= 0)
+ fsnotify_mkdir(dir, dentry);
+
+ return ret;
+}
+
+/* File system initialization */
+struct spufs_fs_context {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t mode;
+};
+
+enum {
+ Opt_uid, Opt_gid, Opt_mode, Opt_debug,
+};
+
+static const struct fs_parameter_spec spufs_fs_parameters[] = {
+ fsparam_u32 ("gid", Opt_gid),
+ fsparam_u32oct ("mode", Opt_mode),
+ fsparam_u32 ("uid", Opt_uid),
+ fsparam_flag ("debug", Opt_debug),
+ {}
+};
+
+static int spufs_show_options(struct seq_file *m, struct dentry *root)
+{
+ struct spufs_sb_info *sbi = spufs_get_sb_info(root->d_sb);
+ struct inode *inode = root->d_inode;
+
+ if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID))
+ seq_printf(m, ",uid=%u",
+ from_kuid_munged(&init_user_ns, inode->i_uid));
+ if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID))
+ seq_printf(m, ",gid=%u",
+ from_kgid_munged(&init_user_ns, inode->i_gid));
+ if ((inode->i_mode & S_IALLUGO) != 0775)
+ seq_printf(m, ",mode=%o", inode->i_mode);
+ if (sbi->debug)
+ seq_puts(m, ",debug");
+ return 0;
+}
+
+static int spufs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct spufs_fs_context *ctx = fc->fs_private;
+ struct spufs_sb_info *sbi = fc->s_fs_info;
+ struct fs_parse_result result;
+ kuid_t uid;
+ kgid_t gid;
+ int opt;
+
+ opt = fs_parse(fc, spufs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_uid:
+ uid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(uid))
+ return invalf(fc, "Unknown uid");
+ ctx->uid = uid;
+ break;
+ case Opt_gid:
+ gid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(gid))
+ return invalf(fc, "Unknown gid");
+ ctx->gid = gid;
+ break;
+ case Opt_mode:
+ ctx->mode = result.uint_32 & S_IALLUGO;
+ break;
+ case Opt_debug:
+ sbi->debug = true;
+ break;
+ }
+
+ return 0;
+}
+
+static void spufs_exit_isolated_loader(void)
+{
+ free_pages((unsigned long) isolated_loader,
+ get_order(isolated_loader_size));
+}
+
+static void __init
+spufs_init_isolated_loader(void)
+{
+ struct device_node *dn;
+ const char *loader;
+ int size;
+
+ dn = of_find_node_by_path("/spu-isolation");
+ if (!dn)
+ return;
+
+ loader = of_get_property(dn, "loader", &size);
+ of_node_put(dn);
+ if (!loader)
+ return;
+
+ /* the loader must be align on a 16 byte boundary */
+ isolated_loader = (char *)__get_free_pages(GFP_KERNEL, get_order(size));
+ if (!isolated_loader)
+ return;
+
+ isolated_loader_size = size;
+ memcpy(isolated_loader, loader, size);
+ printk(KERN_INFO "spufs: SPU isolation mode enabled\n");
+}
+
+static int spufs_create_root(struct super_block *sb, struct fs_context *fc)
+{
+ struct spufs_fs_context *ctx = fc->fs_private;
+ struct inode *inode;
+
+ if (!spu_management_ops)
+ return -ENODEV;
+
+ inode = spufs_new_inode(sb, S_IFDIR | ctx->mode);
+ if (!inode)
+ return -ENOMEM;
+
+ inode->i_uid = ctx->uid;
+ inode->i_gid = ctx->gid;
+ inode->i_op = &simple_dir_inode_operations;
+ inode->i_fop = &simple_dir_operations;
+ SPUFS_I(inode)->i_ctx = NULL;
+ inc_nlink(inode);
+
+ sb->s_root = d_make_root(inode);
+ if (!sb->s_root)
+ return -ENOMEM;
+ return 0;
+}
+
+static const struct super_operations spufs_ops = {
+ .alloc_inode = spufs_alloc_inode,
+ .free_inode = spufs_free_inode,
+ .statfs = simple_statfs,
+ .evict_inode = spufs_evict_inode,
+ .show_options = spufs_show_options,
+};
+
+static int spufs_fill_super(struct super_block *sb, struct fs_context *fc)
+{
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_magic = SPUFS_MAGIC;
+ sb->s_op = &spufs_ops;
+
+ return spufs_create_root(sb, fc);
+}
+
+static int spufs_get_tree(struct fs_context *fc)
+{
+ return get_tree_single(fc, spufs_fill_super);
+}
+
+static void spufs_free_fc(struct fs_context *fc)
+{
+ kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations spufs_context_ops = {
+ .free = spufs_free_fc,
+ .parse_param = spufs_parse_param,
+ .get_tree = spufs_get_tree,
+};
+
+static int spufs_init_fs_context(struct fs_context *fc)
+{
+ struct spufs_fs_context *ctx;
+ struct spufs_sb_info *sbi;
+
+ ctx = kzalloc(sizeof(struct spufs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ goto nomem;
+
+ sbi = kzalloc(sizeof(struct spufs_sb_info), GFP_KERNEL);
+ if (!sbi)
+ goto nomem_ctx;
+
+ ctx->uid = current_uid();
+ ctx->gid = current_gid();
+ ctx->mode = 0755;
+
+ fc->fs_private = ctx;
+ fc->s_fs_info = sbi;
+ fc->ops = &spufs_context_ops;
+ return 0;
+
+nomem_ctx:
+ kfree(ctx);
+nomem:
+ return -ENOMEM;
+}
+
+static struct file_system_type spufs_type = {
+ .owner = THIS_MODULE,
+ .name = "spufs",
+ .init_fs_context = spufs_init_fs_context,
+ .parameters = spufs_fs_parameters,
+ .kill_sb = kill_litter_super,
+};
+MODULE_ALIAS_FS("spufs");
+
+static int __init spufs_init(void)
+{
+ int ret;
+
+ ret = -ENODEV;
+ if (!spu_management_ops)
+ goto out;
+
+ ret = -ENOMEM;
+ spufs_inode_cache = kmem_cache_create("spufs_inode_cache",
+ sizeof(struct spufs_inode_info), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, spufs_init_once);
+
+ if (!spufs_inode_cache)
+ goto out;
+ ret = spu_sched_init();
+ if (ret)
+ goto out_cache;
+ ret = register_spu_syscalls(&spufs_calls);
+ if (ret)
+ goto out_sched;
+ ret = register_filesystem(&spufs_type);
+ if (ret)
+ goto out_syscalls;
+
+ spufs_init_isolated_loader();
+
+ return 0;
+
+out_syscalls:
+ unregister_spu_syscalls(&spufs_calls);
+out_sched:
+ spu_sched_exit();
+out_cache:
+ kmem_cache_destroy(spufs_inode_cache);
+out:
+ return ret;
+}
+module_init(spufs_init);
+
+static void __exit spufs_exit(void)
+{
+ spu_sched_exit();
+ spufs_exit_isolated_loader();
+ unregister_spu_syscalls(&spufs_calls);
+ unregister_filesystem(&spufs_type);
+ kmem_cache_destroy(spufs_inode_cache);
+}
+module_exit(spufs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnd Bergmann <arndb@de.ibm.com>");
+
diff --git a/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
new file mode 100644
index 000000000..43b9dde7f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SPU local store allocation routines
+ *
+ * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu.h>
+
+#include "spufs.h"
+
+int spu_alloc_lscsa(struct spu_state *csa)
+{
+ struct spu_lscsa *lscsa;
+ unsigned char *p;
+
+ lscsa = vzalloc(sizeof(*lscsa));
+ if (!lscsa)
+ return -ENOMEM;
+ csa->lscsa = lscsa;
+
+ /* Set LS pages reserved to allow for user-space mapping. */
+ for (p = lscsa->ls; p < lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ SetPageReserved(vmalloc_to_page(p));
+
+ return 0;
+}
+
+void spu_free_lscsa(struct spu_state *csa)
+{
+ /* Clear reserved bit before vfree. */
+ unsigned char *p;
+
+ if (csa->lscsa == NULL)
+ return;
+
+ for (p = csa->lscsa->ls; p < csa->lscsa->ls + LS_SIZE; p += PAGE_SIZE)
+ ClearPageReserved(vmalloc_to_page(p));
+
+ vfree(csa->lscsa);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
new file mode 100644
index 000000000..ce52b8749
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -0,0 +1,451 @@
+// SPDX-License-Identifier: GPL-2.0
+#define DEBUG
+
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+
+#include "spufs.h"
+
+/* interrupt-level stop callback function. */
+void spufs_stop_callback(struct spu *spu, int irq)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ /*
+ * It should be impossible to preempt a context while an exception
+ * is being processed, since the context switch code is specially
+ * coded to deal with interrupts ... But, just in case, sanity check
+ * the context pointer. It is OK to return doing nothing since
+ * the exception will be regenerated when the context is resumed.
+ */
+ if (ctx) {
+ /* Copy exception arguments into module specific structure */
+ switch(irq) {
+ case 0 :
+ ctx->csa.class_0_pending = spu->class_0_pending;
+ ctx->csa.class_0_dar = spu->class_0_dar;
+ break;
+ case 1 :
+ ctx->csa.class_1_dsisr = spu->class_1_dsisr;
+ ctx->csa.class_1_dar = spu->class_1_dar;
+ break;
+ case 2 :
+ break;
+ }
+
+ /* ensure that the exception status has hit memory before a
+ * thread waiting on the context's stop queue is woken */
+ smp_wmb();
+
+ wake_up_all(&ctx->stop_wq);
+ }
+}
+
+int spu_stopped(struct spu_context *ctx, u32 *stat)
+{
+ u64 dsisr;
+ u32 stopped;
+
+ stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+
+top:
+ *stat = ctx->ops->status_read(ctx);
+ if (*stat & stopped) {
+ /*
+ * If the spu hasn't finished stopping, we need to
+ * re-read the register to get the stopped value.
+ */
+ if (*stat & SPU_STATUS_RUNNING)
+ goto top;
+ return 1;
+ }
+
+ if (test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
+ return 1;
+
+ dsisr = ctx->csa.class_1_dsisr;
+ if (dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED))
+ return 1;
+
+ if (ctx->csa.class_0_pending)
+ return 1;
+
+ return 0;
+}
+
+static int spu_setup_isolated(struct spu_context *ctx)
+{
+ int ret;
+ u64 __iomem *mfc_cntl;
+ u64 sr1;
+ u32 status;
+ unsigned long timeout;
+ const u32 status_loading = SPU_STATUS_RUNNING
+ | SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS;
+
+ ret = -ENODEV;
+ if (!isolated_loader)
+ goto out;
+
+ /*
+ * We need to exclude userspace access to the context.
+ *
+ * To protect against memory access we invalidate all ptes
+ * and make sure the pagefault handlers block on the mutex.
+ */
+ spu_unmap_mappings(ctx);
+
+ mfc_cntl = &ctx->spu->priv2->mfc_control_RW;
+
+ /* purge the MFC DMA queue to ensure no spurious accesses before we
+ * enter kernel mode */
+ timeout = jiffies + HZ;
+ out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST);
+ while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK)
+ != MFC_CNTL_PURGE_DMA_COMPLETE) {
+ if (time_after(jiffies, timeout)) {
+ printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+ cond_resched();
+ }
+
+ /* clear purge status */
+ out_be64(mfc_cntl, 0);
+
+ /* put the SPE in kernel mode to allow access to the loader */
+ sr1 = spu_mfc_sr1_get(ctx->spu);
+ sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK;
+ spu_mfc_sr1_set(ctx->spu, sr1);
+
+ /* start the loader */
+ ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32);
+ ctx->ops->signal2_write(ctx,
+ (unsigned long)isolated_loader & 0xffffffff);
+
+ ctx->ops->runcntl_write(ctx,
+ SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+
+ ret = 0;
+ timeout = jiffies + HZ;
+ while (((status = ctx->ops->status_read(ctx)) & status_loading) ==
+ status_loading) {
+ if (time_after(jiffies, timeout)) {
+ printk(KERN_ERR "%s: timeout waiting for loader\n",
+ __func__);
+ ret = -EIO;
+ goto out_drop_priv;
+ }
+ cond_resched();
+ }
+
+ if (!(status & SPU_STATUS_RUNNING)) {
+ /* If isolated LOAD has failed: run SPU, we will get a stop-and
+ * signal later. */
+ pr_debug("%s: isolated LOAD failed\n", __func__);
+ ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+ ret = -EACCES;
+ goto out_drop_priv;
+ }
+
+ if (!(status & SPU_STATUS_ISOLATED_STATE)) {
+ /* This isn't allowed by the CBEA, but check anyway */
+ pr_debug("%s: SPU fell out of isolated mode?\n", __func__);
+ ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP);
+ ret = -EINVAL;
+ goto out_drop_priv;
+ }
+
+out_drop_priv:
+ /* Finished accessing the loader. Drop kernel mode */
+ sr1 |= MFC_STATE1_PROBLEM_STATE_MASK;
+ spu_mfc_sr1_set(ctx->spu, sr1);
+
+out:
+ return ret;
+}
+
+static int spu_run_init(struct spu_context *ctx, u32 *npc)
+{
+ unsigned long runcntl = SPU_RUNCNTL_RUNNABLE;
+ int ret;
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ /*
+ * NOSCHED is synchronous scheduling with respect to the caller.
+ * The caller waits for the context to be loaded.
+ */
+ if (ctx->flags & SPU_CREATE_NOSCHED) {
+ if (ctx->state == SPU_STATE_SAVED) {
+ ret = spu_activate(ctx, 0);
+ if (ret)
+ return ret;
+ }
+ }
+
+ /*
+ * Apply special setup as required.
+ */
+ if (ctx->flags & SPU_CREATE_ISOLATE) {
+ if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) {
+ ret = spu_setup_isolated(ctx);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * If userspace has set the runcntrl register (eg, to
+ * issue an isolated exit), we need to re-set it here
+ */
+ runcntl = ctx->ops->runcntl_read(ctx) &
+ (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE);
+ if (runcntl == 0)
+ runcntl = SPU_RUNCNTL_RUNNABLE;
+ } else {
+ unsigned long privcntl;
+
+ if (test_thread_flag(TIF_SINGLESTEP))
+ privcntl = SPU_PRIVCNTL_MODE_SINGLE_STEP;
+ else
+ privcntl = SPU_PRIVCNTL_MODE_NORMAL;
+
+ ctx->ops->privcntl_write(ctx, privcntl);
+ ctx->ops->npc_write(ctx, *npc);
+ }
+
+ ctx->ops->runcntl_write(ctx, runcntl);
+
+ if (ctx->flags & SPU_CREATE_NOSCHED) {
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+ } else {
+
+ if (ctx->state == SPU_STATE_SAVED) {
+ ret = spu_activate(ctx, 0);
+ if (ret)
+ return ret;
+ } else {
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+ }
+ }
+
+ set_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+ return 0;
+}
+
+static int spu_run_fini(struct spu_context *ctx, u32 *npc,
+ u32 *status)
+{
+ int ret = 0;
+
+ spu_del_from_rq(ctx);
+
+ *status = ctx->ops->status_read(ctx);
+ *npc = ctx->ops->npc_read(ctx);
+
+ spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+ clear_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags);
+ spu_switch_log_notify(NULL, ctx, SWITCH_LOG_EXIT, *status);
+ spu_release(ctx);
+
+ if (signal_pending(current))
+ ret = -ERESTARTSYS;
+
+ return ret;
+}
+
+/*
+ * SPU syscall restarting is tricky because we violate the basic
+ * assumption that the signal handler is running on the interrupted
+ * thread. Here instead, the handler runs on PowerPC user space code,
+ * while the syscall was called from the SPU.
+ * This means we can only do a very rough approximation of POSIX
+ * signal semantics.
+ */
+static int spu_handle_restartsys(struct spu_context *ctx, long *spu_ret,
+ unsigned int *npc)
+{
+ int ret;
+
+ switch (*spu_ret) {
+ case -ERESTARTSYS:
+ case -ERESTARTNOINTR:
+ /*
+ * Enter the regular syscall restarting for
+ * sys_spu_run, then restart the SPU syscall
+ * callback.
+ */
+ *npc -= 8;
+ ret = -ERESTARTSYS;
+ break;
+ case -ERESTARTNOHAND:
+ case -ERESTART_RESTARTBLOCK:
+ /*
+ * Restart block is too hard for now, just return -EINTR
+ * to the SPU.
+ * ERESTARTNOHAND comes from sys_pause, we also return
+ * -EINTR from there.
+ * Assume that we need to be restarted ourselves though.
+ */
+ *spu_ret = -EINTR;
+ ret = -ERESTARTSYS;
+ break;
+ default:
+ printk(KERN_WARNING "%s: unexpected return code %ld\n",
+ __func__, *spu_ret);
+ ret = 0;
+ }
+ return ret;
+}
+
+static int spu_process_callback(struct spu_context *ctx)
+{
+ struct spu_syscall_block s;
+ u32 ls_pointer, npc;
+ void __iomem *ls;
+ long spu_ret;
+ int ret;
+
+ /* get syscall block from local store */
+ npc = ctx->ops->npc_read(ctx) & ~3;
+ ls = (void __iomem *)ctx->ops->get_ls(ctx);
+ ls_pointer = in_be32(ls + npc);
+ if (ls_pointer > (LS_SIZE - sizeof(s)))
+ return -EFAULT;
+ memcpy_fromio(&s, ls + ls_pointer, sizeof(s));
+
+ /* do actual syscall without pinning the spu */
+ ret = 0;
+ spu_ret = -ENOSYS;
+ npc += 4;
+
+ if (s.nr_ret < NR_syscalls) {
+ spu_release(ctx);
+ /* do actual system call from here */
+ spu_ret = spu_sys_callback(&s);
+ if (spu_ret <= -ERESTARTSYS) {
+ ret = spu_handle_restartsys(ctx, &spu_ret, &npc);
+ }
+ mutex_lock(&ctx->state_mutex);
+ if (ret == -ERESTARTSYS)
+ return ret;
+ }
+
+ /* need to re-get the ls, as it may have changed when we released the
+ * spu */
+ ls = (void __iomem *)ctx->ops->get_ls(ctx);
+
+ /* write result, jump over indirect pointer */
+ memcpy_toio(ls + ls_pointer, &spu_ret, sizeof(spu_ret));
+ ctx->ops->npc_write(ctx, npc);
+ ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+ return ret;
+}
+
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
+{
+ int ret;
+ u32 status;
+
+ if (mutex_lock_interruptible(&ctx->run_mutex))
+ return -ERESTARTSYS;
+
+ ctx->event_return = 0;
+
+ ret = spu_acquire(ctx);
+ if (ret)
+ goto out_unlock;
+
+ spu_enable_spu(ctx);
+
+ spu_update_sched_info(ctx);
+
+ ret = spu_run_init(ctx, npc);
+ if (ret) {
+ spu_release(ctx);
+ goto out;
+ }
+
+ do {
+ ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
+ if (unlikely(ret)) {
+ /*
+ * This is nasty: we need the state_mutex for all the
+ * bookkeeping even if the syscall was interrupted by
+ * a signal. ewww.
+ */
+ mutex_lock(&ctx->state_mutex);
+ break;
+ }
+ if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
+ &ctx->sched_flags))) {
+ if (!(status & SPU_STATUS_STOPPED_BY_STOP))
+ continue;
+ }
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
+ ret = spu_process_callback(ctx);
+ if (ret)
+ break;
+ status &= ~SPU_STATUS_STOPPED_BY_STOP;
+ }
+ ret = spufs_handle_class1(ctx);
+ if (ret)
+ break;
+
+ ret = spufs_handle_class0(ctx);
+ if (ret)
+ break;
+
+ if (signal_pending(current))
+ ret = -ERESTARTSYS;
+ } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
+ SPU_STATUS_STOPPED_BY_HALT |
+ SPU_STATUS_SINGLE_STEP)));
+
+ spu_disable_spu(ctx);
+ ret = spu_run_fini(ctx, npc, &status);
+ spu_yield(ctx);
+
+ if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+ ctx->stats.libassist++;
+
+ if ((ret == 0) ||
+ ((ret == -ERESTARTSYS) &&
+ ((status & SPU_STATUS_STOPPED_BY_HALT) ||
+ (status & SPU_STATUS_SINGLE_STEP) ||
+ ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+ ret = status;
+
+ /* Note: we don't need to force_sig SIGTRAP on single-step
+ * since we have TIF_SINGLESTEP set, thus the kernel will do
+ * it upon return from the syscall anyway.
+ */
+ if (unlikely(status & SPU_STATUS_SINGLE_STEP))
+ ret = -ERESTARTSYS;
+
+ else if (unlikely((status & SPU_STATUS_STOPPED_BY_STOP)
+ && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff)) {
+ force_sig(SIGTRAP);
+ ret = -ERESTARTSYS;
+ }
+
+out:
+ *event = ctx->event_return;
+out_unlock:
+ mutex_unlock(&ctx->run_mutex);
+ return ret;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
new file mode 100644
index 000000000..99bd027a7
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -0,0 +1,1141 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* sched.c - SPU scheduler.
+ *
+ * Copyright (C) IBM 2005
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * 2006-03-31 NUMA domains added.
+ */
+
+#undef DEBUG
+
+#include <linux/errno.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/rt.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/completion.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/numa.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/kthread.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/io.h>
+#include <asm/mmu_context.h>
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_priv1.h>
+#include "spufs.h"
+#define CREATE_TRACE_POINTS
+#include "sputrace.h"
+
+struct spu_prio_array {
+ DECLARE_BITMAP(bitmap, MAX_PRIO);
+ struct list_head runq[MAX_PRIO];
+ spinlock_t runq_lock;
+ int nr_waiting;
+};
+
+static unsigned long spu_avenrun[3];
+static struct spu_prio_array *spu_prio;
+static struct task_struct *spusched_task;
+static struct timer_list spusched_timer;
+static struct timer_list spuloadavg_timer;
+
+/*
+ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+ */
+#define NORMAL_PRIO 120
+
+/*
+ * Frequency of the spu scheduler tick. By default we do one SPU scheduler
+ * tick for every 10 CPU scheduler ticks.
+ */
+#define SPUSCHED_TICK (10)
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is
+ * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ */
+#define MIN_SPU_TIMESLICE max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
+#define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK))
+
+#define SCALE_PRIO(x, prio) \
+ max(x * (MAX_PRIO - prio) / (NICE_WIDTH / 2), MIN_SPU_TIMESLICE)
+
+/*
+ * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
+ * [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+void spu_set_timeslice(struct spu_context *ctx)
+{
+ if (ctx->prio < NORMAL_PRIO)
+ ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
+ else
+ ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
+}
+
+/*
+ * Update scheduling information from the owning thread.
+ */
+void __spu_update_sched_info(struct spu_context *ctx)
+{
+ /*
+ * assert that the context is not on the runqueue, so it is safe
+ * to change its scheduling parameters.
+ */
+ BUG_ON(!list_empty(&ctx->rq));
+
+ /*
+ * 32-Bit assignments are atomic on powerpc, and we don't care about
+ * memory ordering here because retrieving the controlling thread is
+ * per definition racy.
+ */
+ ctx->tid = current->pid;
+
+ /*
+ * We do our own priority calculations, so we normally want
+ * ->static_prio to start with. Unfortunately this field
+ * contains junk for threads with a realtime scheduling
+ * policy so we have to look at ->prio in this case.
+ */
+ if (rt_prio(current->prio))
+ ctx->prio = current->prio;
+ else
+ ctx->prio = current->static_prio;
+ ctx->policy = current->policy;
+
+ /*
+ * TO DO: the context may be loaded, so we may need to activate
+ * it again on a different node. But it shouldn't hurt anything
+ * to update its parameters, because we know that the scheduler
+ * is not actively looking at this field, since it is not on the
+ * runqueue. The context will be rescheduled on the proper node
+ * if it is timesliced or preempted.
+ */
+ cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
+
+ /* Save the current cpu id for spu interrupt routing. */
+ ctx->last_ran = raw_smp_processor_id();
+}
+
+void spu_update_sched_info(struct spu_context *ctx)
+{
+ int node;
+
+ if (ctx->state == SPU_STATE_RUNNABLE) {
+ node = ctx->spu->node;
+
+ /*
+ * Take list_mutex to sync with find_victim().
+ */
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ __spu_update_sched_info(ctx);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ } else {
+ __spu_update_sched_info(ctx);
+ }
+}
+
+static int __node_allowed(struct spu_context *ctx, int node)
+{
+ if (nr_cpus_node(node)) {
+ const struct cpumask *mask = cpumask_of_node(node);
+
+ if (cpumask_intersects(mask, &ctx->cpus_allowed))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int node_allowed(struct spu_context *ctx, int node)
+{
+ int rval;
+
+ spin_lock(&spu_prio->runq_lock);
+ rval = __node_allowed(ctx, node);
+ spin_unlock(&spu_prio->runq_lock);
+
+ return rval;
+}
+
+void do_notify_spus_active(void)
+{
+ int node;
+
+ /*
+ * Wake up the active spu_contexts.
+ */
+ for_each_online_node(node) {
+ struct spu *spu;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->alloc_state != SPU_FREE) {
+ struct spu_context *ctx = spu->ctx;
+ set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+ &ctx->sched_flags);
+ mb();
+ wake_up_all(&ctx->stop_wq);
+ }
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+}
+
+/**
+ * spu_bind_context - bind spu context to physical spu
+ * @spu: physical spu to bind to
+ * @ctx: context to bind
+ */
+static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
+{
+ spu_context_trace(spu_bind_context__enter, ctx, spu);
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ if (ctx->flags & SPU_CREATE_NOSCHED)
+ atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
+
+ ctx->stats.slb_flt_base = spu->stats.slb_flt;
+ ctx->stats.class2_intr_base = spu->stats.class2_intr;
+
+ spu_associate_mm(spu, ctx->owner);
+
+ spin_lock_irq(&spu->register_lock);
+ spu->ctx = ctx;
+ spu->flags = 0;
+ ctx->spu = spu;
+ ctx->ops = &spu_hw_ops;
+ spu->pid = current->pid;
+ spu->tgid = current->tgid;
+ spu->ibox_callback = spufs_ibox_callback;
+ spu->wbox_callback = spufs_wbox_callback;
+ spu->stop_callback = spufs_stop_callback;
+ spu->mfc_callback = spufs_mfc_callback;
+ spin_unlock_irq(&spu->register_lock);
+
+ spu_unmap_mappings(ctx);
+
+ spu_switch_log_notify(spu, ctx, SWITCH_LOG_START, 0);
+ spu_restore(&ctx->csa, spu);
+ spu->timestamp = jiffies;
+ ctx->state = SPU_STATE_RUNNABLE;
+
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+}
+
+/*
+ * Must be used with the list_mutex held.
+ */
+static inline int sched_spu(struct spu *spu)
+{
+ BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
+ return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
+}
+
+static void aff_merge_remaining_ctxs(struct spu_gang *gang)
+{
+ struct spu_context *ctx;
+
+ list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
+ if (list_empty(&ctx->aff_list))
+ list_add(&ctx->aff_list, &gang->aff_list_head);
+ }
+ gang->aff_flags |= AFF_MERGED;
+}
+
+static void aff_set_offsets(struct spu_gang *gang)
+{
+ struct spu_context *ctx;
+ int offset;
+
+ offset = -1;
+ list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+ aff_list) {
+ if (&ctx->aff_list == &gang->aff_list_head)
+ break;
+ ctx->aff_offset = offset--;
+ }
+
+ offset = 0;
+ list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
+ if (&ctx->aff_list == &gang->aff_list_head)
+ break;
+ ctx->aff_offset = offset++;
+ }
+
+ gang->aff_flags |= AFF_OFFSETS_SET;
+}
+
+static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
+ int group_size, int lowest_offset)
+{
+ struct spu *spu;
+ int node, n;
+
+ /*
+ * TODO: A better algorithm could be used to find a good spu to be
+ * used as reference location for the ctxs chain.
+ */
+ node = cpu_to_node(raw_smp_processor_id());
+ for (n = 0; n < MAX_NUMNODES; n++, node++) {
+ /*
+ * "available_spus" counts how many spus are not potentially
+ * going to be used by other affinity gangs whose reference
+ * context is already in place. Although this code seeks to
+ * avoid having affinity gangs with a summed amount of
+ * contexts bigger than the amount of spus in the node,
+ * this may happen sporadically. In this case, available_spus
+ * becomes negative, which is harmless.
+ */
+ int available_spus;
+
+ node = (node < MAX_NUMNODES) ? node : 0;
+ if (!node_allowed(ctx, node))
+ continue;
+
+ available_spus = 0;
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->ctx && spu->ctx->gang && !spu->ctx->aff_offset
+ && spu->ctx->gang->aff_ref_spu)
+ available_spus -= spu->ctx->gang->contexts;
+ available_spus++;
+ }
+ if (available_spus < ctx->gang->contexts) {
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ continue;
+ }
+
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if ((!mem_aff || spu->has_mem_affinity) &&
+ sched_spu(spu)) {
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ return spu;
+ }
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+ return NULL;
+}
+
+static void aff_set_ref_point_location(struct spu_gang *gang)
+{
+ int mem_aff, gs, lowest_offset;
+ struct spu_context *tmp, *ctx;
+
+ mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
+ lowest_offset = 0;
+ gs = 0;
+
+ list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+ gs++;
+
+ list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+ aff_list) {
+ if (&ctx->aff_list == &gang->aff_list_head)
+ break;
+ lowest_offset = ctx->aff_offset;
+ }
+
+ gang->aff_ref_spu = aff_ref_location(gang->aff_ref_ctx, mem_aff, gs,
+ lowest_offset);
+}
+
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
+{
+ struct spu *spu;
+
+ spu = NULL;
+ if (offset >= 0) {
+ list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+ BUG_ON(spu->node != node);
+ if (offset == 0)
+ break;
+ if (sched_spu(spu))
+ offset--;
+ }
+ } else {
+ list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+ BUG_ON(spu->node != node);
+ if (offset == 0)
+ break;
+ if (sched_spu(spu))
+ offset++;
+ }
+ }
+
+ return spu;
+}
+
+/*
+ * affinity_check is called each time a context is going to be scheduled.
+ * It returns the spu ptr on which the context must run.
+ */
+static int has_affinity(struct spu_context *ctx)
+{
+ struct spu_gang *gang = ctx->gang;
+
+ if (list_empty(&ctx->aff_list))
+ return 0;
+
+ if (atomic_read(&ctx->gang->aff_sched_count) == 0)
+ ctx->gang->aff_ref_spu = NULL;
+
+ if (!gang->aff_ref_spu) {
+ if (!(gang->aff_flags & AFF_MERGED))
+ aff_merge_remaining_ctxs(gang);
+ if (!(gang->aff_flags & AFF_OFFSETS_SET))
+ aff_set_offsets(gang);
+ aff_set_ref_point_location(gang);
+ }
+
+ return gang->aff_ref_spu != NULL;
+}
+
+/**
+ * spu_unbind_context - unbind spu context from physical spu
+ * @spu: physical spu to unbind from
+ * @ctx: context to unbind
+ */
+static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
+{
+ u32 status;
+
+ spu_context_trace(spu_unbind_context__enter, ctx, spu);
+
+ spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+ if (spu->ctx->flags & SPU_CREATE_NOSCHED)
+ atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
+
+ if (ctx->gang)
+ /*
+ * If ctx->gang->aff_sched_count is positive, SPU affinity is
+ * being considered in this gang. Using atomic_dec_if_positive
+ * allow us to skip an explicit check for affinity in this gang
+ */
+ atomic_dec_if_positive(&ctx->gang->aff_sched_count);
+
+ spu_unmap_mappings(ctx);
+ spu_save(&ctx->csa, spu);
+ spu_switch_log_notify(spu, ctx, SWITCH_LOG_STOP, 0);
+
+ spin_lock_irq(&spu->register_lock);
+ spu->timestamp = jiffies;
+ ctx->state = SPU_STATE_SAVED;
+ spu->ibox_callback = NULL;
+ spu->wbox_callback = NULL;
+ spu->stop_callback = NULL;
+ spu->mfc_callback = NULL;
+ spu->pid = 0;
+ spu->tgid = 0;
+ ctx->ops = &spu_backing_ops;
+ spu->flags = 0;
+ spu->ctx = NULL;
+ spin_unlock_irq(&spu->register_lock);
+
+ spu_associate_mm(spu, NULL);
+
+ ctx->stats.slb_flt +=
+ (spu->stats.slb_flt - ctx->stats.slb_flt_base);
+ ctx->stats.class2_intr +=
+ (spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+ /* This maps the underlying spu state to idle */
+ spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+ ctx->spu = NULL;
+
+ if (spu_stopped(ctx, &status))
+ wake_up_all(&ctx->stop_wq);
+}
+
+/**
+ * spu_add_to_rq - add a context to the runqueue
+ * @ctx: context to add
+ */
+static void __spu_add_to_rq(struct spu_context *ctx)
+{
+ /*
+ * Unfortunately this code path can be called from multiple threads
+ * on behalf of a single context due to the way the problem state
+ * mmap support works.
+ *
+ * Fortunately we need to wake up all these threads at the same time
+ * and can simply skip the runqueue addition for every but the first
+ * thread getting into this codepath.
+ *
+ * It's still quite hacky, and long-term we should proxy all other
+ * threads through the owner thread so that spu_run is in control
+ * of all the scheduling activity for a given context.
+ */
+ if (list_empty(&ctx->rq)) {
+ list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+ set_bit(ctx->prio, spu_prio->bitmap);
+ if (!spu_prio->nr_waiting++)
+ mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+ }
+}
+
+static void spu_add_to_rq(struct spu_context *ctx)
+{
+ spin_lock(&spu_prio->runq_lock);
+ __spu_add_to_rq(ctx);
+ spin_unlock(&spu_prio->runq_lock);
+}
+
+static void __spu_del_from_rq(struct spu_context *ctx)
+{
+ int prio = ctx->prio;
+
+ if (!list_empty(&ctx->rq)) {
+ if (!--spu_prio->nr_waiting)
+ del_timer(&spusched_timer);
+ list_del_init(&ctx->rq);
+
+ if (list_empty(&spu_prio->runq[prio]))
+ clear_bit(prio, spu_prio->bitmap);
+ }
+}
+
+void spu_del_from_rq(struct spu_context *ctx)
+{
+ spin_lock(&spu_prio->runq_lock);
+ __spu_del_from_rq(ctx);
+ spin_unlock(&spu_prio->runq_lock);
+}
+
+static void spu_prio_wait(struct spu_context *ctx)
+{
+ DEFINE_WAIT(wait);
+
+ /*
+ * The caller must explicitly wait for a context to be loaded
+ * if the nosched flag is set. If NOSCHED is not set, the caller
+ * queues the context and waits for an spu event or error.
+ */
+ BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED));
+
+ spin_lock(&spu_prio->runq_lock);
+ prepare_to_wait_exclusive(&ctx->stop_wq, &wait, TASK_INTERRUPTIBLE);
+ if (!signal_pending(current)) {
+ __spu_add_to_rq(ctx);
+ spin_unlock(&spu_prio->runq_lock);
+ mutex_unlock(&ctx->state_mutex);
+ schedule();
+ mutex_lock(&ctx->state_mutex);
+ spin_lock(&spu_prio->runq_lock);
+ __spu_del_from_rq(ctx);
+ }
+ spin_unlock(&spu_prio->runq_lock);
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&ctx->stop_wq, &wait);
+}
+
+static struct spu *spu_get_idle(struct spu_context *ctx)
+{
+ struct spu *spu, *aff_ref_spu;
+ int node, n;
+
+ spu_context_nospu_trace(spu_get_idle__enter, ctx);
+
+ if (ctx->gang) {
+ mutex_lock(&ctx->gang->aff_mutex);
+ if (has_affinity(ctx)) {
+ aff_ref_spu = ctx->gang->aff_ref_spu;
+ atomic_inc(&ctx->gang->aff_sched_count);
+ mutex_unlock(&ctx->gang->aff_mutex);
+ node = aff_ref_spu->node;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ spu = ctx_location(aff_ref_spu, ctx->aff_offset, node);
+ if (spu && spu->alloc_state == SPU_FREE)
+ goto found;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ atomic_dec(&ctx->gang->aff_sched_count);
+ goto not_found;
+ }
+ mutex_unlock(&ctx->gang->aff_mutex);
+ }
+ node = cpu_to_node(raw_smp_processor_id());
+ for (n = 0; n < MAX_NUMNODES; n++, node++) {
+ node = (node < MAX_NUMNODES) ? node : 0;
+ if (!node_allowed(ctx, node))
+ continue;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ if (spu->alloc_state == SPU_FREE)
+ goto found;
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+
+ not_found:
+ spu_context_nospu_trace(spu_get_idle__not_found, ctx);
+ return NULL;
+
+ found:
+ spu->alloc_state = SPU_USED;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ spu_context_trace(spu_get_idle__found, ctx, spu);
+ spu_init_channels(spu);
+ return spu;
+}
+
+/**
+ * find_victim - find a lower priority context to preempt
+ * @ctx: candidate context for running
+ *
+ * Returns the freed physical spu to run the new context on.
+ */
+static struct spu *find_victim(struct spu_context *ctx)
+{
+ struct spu_context *victim = NULL;
+ struct spu *spu;
+ int node, n;
+
+ spu_context_nospu_trace(spu_find_victim__enter, ctx);
+
+ /*
+ * Look for a possible preemption candidate on the local node first.
+ * If there is no candidate look at the other nodes. This isn't
+ * exactly fair, but so far the whole spu scheduler tries to keep
+ * a strong node affinity. We might want to fine-tune this in
+ * the future.
+ */
+ restart:
+ node = cpu_to_node(raw_smp_processor_id());
+ for (n = 0; n < MAX_NUMNODES; n++, node++) {
+ node = (node < MAX_NUMNODES) ? node : 0;
+ if (!node_allowed(ctx, node))
+ continue;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+ struct spu_context *tmp = spu->ctx;
+
+ if (tmp && tmp->prio > ctx->prio &&
+ !(tmp->flags & SPU_CREATE_NOSCHED) &&
+ (!victim || tmp->prio > victim->prio)) {
+ victim = spu->ctx;
+ }
+ }
+ if (victim)
+ get_spu_context(victim);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ if (victim) {
+ /*
+ * This nests ctx->state_mutex, but we always lock
+ * higher priority contexts before lower priority
+ * ones, so this is safe until we introduce
+ * priority inheritance schemes.
+ *
+ * XXX if the highest priority context is locked,
+ * this can loop a long time. Might be better to
+ * look at another context or give up after X retries.
+ */
+ if (!mutex_trylock(&victim->state_mutex)) {
+ put_spu_context(victim);
+ victim = NULL;
+ goto restart;
+ }
+
+ spu = victim->spu;
+ if (!spu || victim->prio <= ctx->prio) {
+ /*
+ * This race can happen because we've dropped
+ * the active list mutex. Not a problem, just
+ * restart the search.
+ */
+ mutex_unlock(&victim->state_mutex);
+ put_spu_context(victim);
+ victim = NULL;
+ goto restart;
+ }
+
+ spu_context_trace(__spu_deactivate__unload, ctx, spu);
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ cbe_spu_info[node].nr_active--;
+ spu_unbind_context(spu, victim);
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ victim->stats.invol_ctx_switch++;
+ spu->stats.invol_ctx_switch++;
+ if (test_bit(SPU_SCHED_SPU_RUN, &victim->sched_flags))
+ spu_add_to_rq(victim);
+
+ mutex_unlock(&victim->state_mutex);
+ put_spu_context(victim);
+
+ return spu;
+ }
+ }
+
+ return NULL;
+}
+
+static void __spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+ int node = spu->node;
+ int success = 0;
+
+ spu_set_timeslice(ctx);
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ if (spu->ctx == NULL) {
+ spu_bind_context(spu, ctx);
+ cbe_spu_info[node].nr_active++;
+ spu->alloc_state = SPU_USED;
+ success = 1;
+ }
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+
+ if (success)
+ wake_up_all(&ctx->run_wq);
+ else
+ spu_add_to_rq(ctx);
+}
+
+static void spu_schedule(struct spu *spu, struct spu_context *ctx)
+{
+ /* not a candidate for interruptible because it's called either
+ from the scheduler thread or from spu_deactivate */
+ mutex_lock(&ctx->state_mutex);
+ if (ctx->state == SPU_STATE_SAVED)
+ __spu_schedule(spu, ctx);
+ spu_release(ctx);
+}
+
+/**
+ * spu_unschedule - remove a context from a spu, and possibly release it.
+ * @spu: The SPU to unschedule from
+ * @ctx: The context currently scheduled on the SPU
+ * @free_spu Whether to free the SPU for other contexts
+ *
+ * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the
+ * SPU is made available for other contexts (ie, may be returned by
+ * spu_get_idle). If this is zero, the caller is expected to schedule another
+ * context to this spu.
+ *
+ * Should be called with ctx->state_mutex held.
+ */
+static void spu_unschedule(struct spu *spu, struct spu_context *ctx,
+ int free_spu)
+{
+ int node = spu->node;
+
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ cbe_spu_info[node].nr_active--;
+ if (free_spu)
+ spu->alloc_state = SPU_FREE;
+ spu_unbind_context(spu, ctx);
+ ctx->stats.invol_ctx_switch++;
+ spu->stats.invol_ctx_switch++;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+}
+
+/**
+ * spu_activate - find a free spu for a context and execute it
+ * @ctx: spu context to schedule
+ * @flags: flags (currently ignored)
+ *
+ * Tries to find a free spu to run @ctx. If no free spu is available
+ * add the context to the runqueue so it gets woken up once an spu
+ * is available.
+ */
+int spu_activate(struct spu_context *ctx, unsigned long flags)
+{
+ struct spu *spu;
+
+ /*
+ * If there are multiple threads waiting for a single context
+ * only one actually binds the context while the others will
+ * only be able to acquire the state_mutex once the context
+ * already is in runnable state.
+ */
+ if (ctx->spu)
+ return 0;
+
+spu_activate_top:
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
+ spu = spu_get_idle(ctx);
+ /*
+ * If this is a realtime thread we try to get it running by
+ * preempting a lower priority thread.
+ */
+ if (!spu && rt_prio(ctx->prio))
+ spu = find_victim(ctx);
+ if (spu) {
+ unsigned long runcntl;
+
+ runcntl = ctx->ops->runcntl_read(ctx);
+ __spu_schedule(spu, ctx);
+ if (runcntl & SPU_RUNCNTL_RUNNABLE)
+ spuctx_switch_state(ctx, SPU_UTIL_USER);
+
+ return 0;
+ }
+
+ if (ctx->flags & SPU_CREATE_NOSCHED) {
+ spu_prio_wait(ctx);
+ goto spu_activate_top;
+ }
+
+ spu_add_to_rq(ctx);
+
+ return 0;
+}
+
+/**
+ * grab_runnable_context - try to find a runnable context
+ *
+ * Remove the highest priority context on the runqueue and return it
+ * to the caller. Returns %NULL if no runnable context was found.
+ */
+static struct spu_context *grab_runnable_context(int prio, int node)
+{
+ struct spu_context *ctx;
+ int best;
+
+ spin_lock(&spu_prio->runq_lock);
+ best = find_first_bit(spu_prio->bitmap, prio);
+ while (best < prio) {
+ struct list_head *rq = &spu_prio->runq[best];
+
+ list_for_each_entry(ctx, rq, rq) {
+ /* XXX(hch): check for affinity here as well */
+ if (__node_allowed(ctx, node)) {
+ __spu_del_from_rq(ctx);
+ goto found;
+ }
+ }
+ best++;
+ }
+ ctx = NULL;
+ found:
+ spin_unlock(&spu_prio->runq_lock);
+ return ctx;
+}
+
+static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
+{
+ struct spu *spu = ctx->spu;
+ struct spu_context *new = NULL;
+
+ if (spu) {
+ new = grab_runnable_context(max_prio, spu->node);
+ if (new || force) {
+ spu_unschedule(spu, ctx, new == NULL);
+ if (new) {
+ if (new->flags & SPU_CREATE_NOSCHED)
+ wake_up(&new->stop_wq);
+ else {
+ spu_release(ctx);
+ spu_schedule(spu, new);
+ /* this one can't easily be made
+ interruptible */
+ mutex_lock(&ctx->state_mutex);
+ }
+ }
+ }
+ }
+
+ return new != NULL;
+}
+
+/**
+ * spu_deactivate - unbind a context from it's physical spu
+ * @ctx: spu context to unbind
+ *
+ * Unbind @ctx from the physical spu it is running on and schedule
+ * the highest priority context to run on the freed physical spu.
+ */
+void spu_deactivate(struct spu_context *ctx)
+{
+ spu_context_nospu_trace(spu_deactivate__enter, ctx);
+ __spu_deactivate(ctx, 1, MAX_PRIO);
+}
+
+/**
+ * spu_yield - yield a physical spu if others are waiting
+ * @ctx: spu context to yield
+ *
+ * Check if there is a higher priority context waiting and if yes
+ * unbind @ctx from the physical spu and schedule the highest
+ * priority context to run on the freed physical spu instead.
+ */
+void spu_yield(struct spu_context *ctx)
+{
+ spu_context_nospu_trace(spu_yield__enter, ctx);
+ if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
+ mutex_lock(&ctx->state_mutex);
+ __spu_deactivate(ctx, 0, MAX_PRIO);
+ mutex_unlock(&ctx->state_mutex);
+ }
+}
+
+static noinline void spusched_tick(struct spu_context *ctx)
+{
+ struct spu_context *new = NULL;
+ struct spu *spu = NULL;
+
+ if (spu_acquire(ctx))
+ BUG(); /* a kernel thread never has signals pending */
+
+ if (ctx->state != SPU_STATE_RUNNABLE)
+ goto out;
+ if (ctx->flags & SPU_CREATE_NOSCHED)
+ goto out;
+ if (ctx->policy == SCHED_FIFO)
+ goto out;
+
+ if (--ctx->time_slice && test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+ goto out;
+
+ spu = ctx->spu;
+
+ spu_context_trace(spusched_tick__preempt, ctx, spu);
+
+ new = grab_runnable_context(ctx->prio + 1, spu->node);
+ if (new) {
+ spu_unschedule(spu, ctx, 0);
+ if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
+ spu_add_to_rq(ctx);
+ } else {
+ spu_context_nospu_trace(spusched_tick__newslice, ctx);
+ if (!ctx->time_slice)
+ ctx->time_slice++;
+ }
+out:
+ spu_release(ctx);
+
+ if (new)
+ spu_schedule(spu, new);
+}
+
+/**
+ * count_active_contexts - count nr of active tasks
+ *
+ * Return the number of tasks currently running or waiting to run.
+ *
+ * Note that we don't take runq_lock / list_mutex here. Reading
+ * a single 32bit value is atomic on powerpc, and we don't care
+ * about memory ordering issues here.
+ */
+static unsigned long count_active_contexts(void)
+{
+ int nr_active = 0, node;
+
+ for (node = 0; node < MAX_NUMNODES; node++)
+ nr_active += cbe_spu_info[node].nr_active;
+ nr_active += spu_prio->nr_waiting;
+
+ return nr_active;
+}
+
+/**
+ * spu_calc_load - update the avenrun load estimates.
+ *
+ * No locking against reading these values from userspace, as for
+ * the CPU loadavg code.
+ */
+static void spu_calc_load(void)
+{
+ unsigned long active_tasks; /* fixed-point */
+
+ active_tasks = count_active_contexts() * FIXED_1;
+ spu_avenrun[0] = calc_load(spu_avenrun[0], EXP_1, active_tasks);
+ spu_avenrun[1] = calc_load(spu_avenrun[1], EXP_5, active_tasks);
+ spu_avenrun[2] = calc_load(spu_avenrun[2], EXP_15, active_tasks);
+}
+
+static void spusched_wake(struct timer_list *unused)
+{
+ mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+ wake_up_process(spusched_task);
+}
+
+static void spuloadavg_wake(struct timer_list *unused)
+{
+ mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ);
+ spu_calc_load();
+}
+
+static int spusched_thread(void *unused)
+{
+ struct spu *spu;
+ int node;
+
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ struct mutex *mtx = &cbe_spu_info[node].list_mutex;
+
+ mutex_lock(mtx);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus,
+ cbe_list) {
+ struct spu_context *ctx = spu->ctx;
+
+ if (ctx) {
+ get_spu_context(ctx);
+ mutex_unlock(mtx);
+ spusched_tick(ctx);
+ mutex_lock(mtx);
+ put_spu_context(ctx);
+ }
+ }
+ mutex_unlock(mtx);
+ }
+ }
+
+ return 0;
+}
+
+void spuctx_switch_state(struct spu_context *ctx,
+ enum spu_utilization_state new_state)
+{
+ unsigned long long curtime;
+ signed long long delta;
+ struct spu *spu;
+ enum spu_utilization_state old_state;
+ int node;
+
+ curtime = ktime_get_ns();
+ delta = curtime - ctx->stats.tstamp;
+
+ WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+ WARN_ON(delta < 0);
+
+ spu = ctx->spu;
+ old_state = ctx->stats.util_state;
+ ctx->stats.util_state = new_state;
+ ctx->stats.tstamp = curtime;
+
+ /*
+ * Update the physical SPU utilization statistics.
+ */
+ if (spu) {
+ ctx->stats.times[old_state] += delta;
+ spu->stats.times[old_state] += delta;
+ spu->stats.util_state = new_state;
+ spu->stats.tstamp = curtime;
+ node = spu->node;
+ if (old_state == SPU_UTIL_USER)
+ atomic_dec(&cbe_spu_info[node].busy_spus);
+ if (new_state == SPU_UTIL_USER)
+ atomic_inc(&cbe_spu_info[node].busy_spus);
+ }
+}
+
+#ifdef CONFIG_PROC_FS
+static int show_spu_loadavg(struct seq_file *s, void *private)
+{
+ int a, b, c;
+
+ a = spu_avenrun[0] + (FIXED_1/200);
+ b = spu_avenrun[1] + (FIXED_1/200);
+ c = spu_avenrun[2] + (FIXED_1/200);
+
+ /*
+ * Note that last_pid doesn't really make much sense for the
+ * SPU loadavg (it even seems very odd on the CPU side...),
+ * but we include it here to have a 100% compatible interface.
+ */
+ seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+ LOAD_INT(a), LOAD_FRAC(a),
+ LOAD_INT(b), LOAD_FRAC(b),
+ LOAD_INT(c), LOAD_FRAC(c),
+ count_active_contexts(),
+ atomic_read(&nr_spu_contexts),
+ idr_get_cursor(&task_active_pid_ns(current)->idr) - 1);
+ return 0;
+}
+#endif
+
+int __init spu_sched_init(void)
+{
+ struct proc_dir_entry *entry;
+ int err = -ENOMEM, i;
+
+ spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
+ if (!spu_prio)
+ goto out;
+
+ for (i = 0; i < MAX_PRIO; i++) {
+ INIT_LIST_HEAD(&spu_prio->runq[i]);
+ __clear_bit(i, spu_prio->bitmap);
+ }
+ spin_lock_init(&spu_prio->runq_lock);
+
+ timer_setup(&spusched_timer, spusched_wake, 0);
+ timer_setup(&spuloadavg_timer, spuloadavg_wake, 0);
+
+ spusched_task = kthread_run(spusched_thread, NULL, "spusched");
+ if (IS_ERR(spusched_task)) {
+ err = PTR_ERR(spusched_task);
+ goto out_free_spu_prio;
+ }
+
+ mod_timer(&spuloadavg_timer, 0);
+
+ entry = proc_create_single("spu_loadavg", 0, NULL, show_spu_loadavg);
+ if (!entry)
+ goto out_stop_kthread;
+
+ pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
+ SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
+ return 0;
+
+ out_stop_kthread:
+ kthread_stop(spusched_task);
+ out_free_spu_prio:
+ kfree(spu_prio);
+ out:
+ return err;
+}
+
+void spu_sched_exit(void)
+{
+ struct spu *spu;
+ int node;
+
+ remove_proc_entry("spu_loadavg", NULL);
+
+ del_timer_sync(&spusched_timer);
+ del_timer_sync(&spuloadavg_timer);
+ kthread_stop(spusched_task);
+
+ for (node = 0; node < MAX_NUMNODES; node++) {
+ mutex_lock(&cbe_spu_info[node].list_mutex);
+ list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+ if (spu->alloc_state != SPU_FREE)
+ spu->alloc_state = SPU_FREE;
+ mutex_unlock(&cbe_spu_info[node].list_mutex);
+ }
+ kfree(spu_prio);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c
new file mode 100644
index 000000000..2cbb6efb2
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * spu_restore.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context restore sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE 0x40000 /* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+#define BR_INSTR 0x327fff80 /* br -4 */
+#define NOP_INSTR 0x40200000 /* nop */
+#define HEQ_INSTR 0x7b000000 /* heq $0, $0 */
+#define STOP_INSTR 0x00000000 /* stop 0x0 */
+#define ILLEGAL_INSTR 0x00800000 /* illegal instr */
+#define RESTORE_COMPLETE 0x00003ffc /* stop 0x3ffc */
+
+static inline void fetch_regs_from_mem(addr64 lscsa_ea)
+{
+ unsigned int ls = (unsigned int)&regs_spill[0];
+ unsigned int size = sizeof(regs_spill);
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0x40; /* GET */
+
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_upper_240kb(addr64 lscsa_ea)
+{
+ unsigned int ls = 16384;
+ unsigned int list = (unsigned int)&dma_list[0];
+ unsigned int size = sizeof(dma_list);
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0x44; /* GETL */
+
+ /* Restore, Step 4:
+ * Enqueue the GETL command (tag 0) to the MFC SPU command
+ * queue to transfer the upper 240 kb of LS from CSA.
+ */
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, list);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void restore_decr(void)
+{
+ unsigned int offset;
+ unsigned int decr_running;
+ unsigned int decr;
+
+ /* Restore, Step 6(moved):
+ * If the LSCSA "decrementer running" flag is set
+ * then write the SPU_WrDec channel with the
+ * decrementer value from LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(decr_status);
+ decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
+ if (decr_running) {
+ offset = LSCSA_QW_OFFSET(decr);
+ decr = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrDec, decr);
+ }
+}
+
+static inline void write_ppu_mb(void)
+{
+ unsigned int offset;
+ unsigned int data;
+
+ /* Restore, Step 11:
+ * Write the MFC_WrOut_MB channel with the PPU_MB
+ * data from LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(ppu_mb);
+ data = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrOutMbox, data);
+}
+
+static inline void write_ppuint_mb(void)
+{
+ unsigned int offset;
+ unsigned int data;
+
+ /* Restore, Step 12:
+ * Write the MFC_WrInt_MB channel with the PPUINT_MB
+ * data from LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(ppuint_mb);
+ data = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrOutIntrMbox, data);
+}
+
+static inline void restore_fpcr(void)
+{
+ unsigned int offset;
+ vector unsigned int fpcr;
+
+ /* Restore, Step 13:
+ * Restore the floating-point status and control
+ * register from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(fpcr);
+ fpcr = regs_spill[offset].v;
+ spu_mtfpscr(fpcr);
+}
+
+static inline void restore_srr0(void)
+{
+ unsigned int offset;
+ unsigned int srr0;
+
+ /* Restore, Step 14:
+ * Restore the SPU SRR0 data from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(srr0);
+ srr0 = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrSRR0, srr0);
+}
+
+static inline void restore_event_mask(void)
+{
+ unsigned int offset;
+ unsigned int event_mask;
+
+ /* Restore, Step 15:
+ * Restore the SPU_RdEventMsk data from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(event_mask);
+ event_mask = regs_spill[offset].slot[0];
+ spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void restore_tag_mask(void)
+{
+ unsigned int offset;
+ unsigned int tag_mask;
+
+ /* Restore, Step 16:
+ * Restore the SPU_RdTagMsk data from the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(tag_mask);
+ tag_mask = regs_spill[offset].slot[0];
+ spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void restore_complete(void)
+{
+ extern void exit_fini(void);
+ unsigned int *exit_instrs = (unsigned int *)exit_fini;
+ unsigned int offset;
+ unsigned int stopped_status;
+ unsigned int stopped_code;
+
+ /* Restore, Step 18:
+ * Issue a stop-and-signal instruction with
+ * "good context restore" signal value.
+ *
+ * Restore, Step 19:
+ * There may be additional instructions placed
+ * here by the PPE Sequence for SPU Context
+ * Restore in order to restore the correct
+ * "stopped state".
+ *
+ * This step is handled here by analyzing the
+ * LSCSA.stopped_status and then modifying the
+ * exit() function to behave appropriately.
+ */
+
+ offset = LSCSA_QW_OFFSET(stopped_status);
+ stopped_status = regs_spill[offset].slot[0];
+ stopped_code = regs_spill[offset].slot[1];
+
+ switch (stopped_status) {
+ case SPU_STOPPED_STATUS_P_I:
+ /* SPU_Status[P,I]=1. Add illegal instruction
+ * followed by stop-and-signal instruction after
+ * end of restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = ILLEGAL_INSTR;
+ exit_instrs[2] = STOP_INSTR | stopped_code;
+ break;
+ case SPU_STOPPED_STATUS_P_H:
+ /* SPU_Status[P,H]=1. Add 'heq $0, $0' followed
+ * by stop-and-signal instruction after end of
+ * restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = HEQ_INSTR;
+ exit_instrs[2] = STOP_INSTR | stopped_code;
+ break;
+ case SPU_STOPPED_STATUS_S_P:
+ /* SPU_Status[S,P]=1. Add nop instruction
+ * followed by 'br -4' after end of restore
+ * code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = STOP_INSTR | stopped_code;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_S_I:
+ /* SPU_Status[S,I]=1. Add illegal instruction
+ * followed by 'br -4' after end of restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = ILLEGAL_INSTR;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_I:
+ /* SPU_Status[I]=1. Add illegal instruction followed
+ * by infinite loop after end of restore sequence.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = ILLEGAL_INSTR;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_S:
+ /* SPU_Status[S]=1. Add two 'nop' instructions. */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = NOP_INSTR;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_H:
+ /* SPU_Status[H]=1. Add 'heq $0, $0' instruction
+ * after end of restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = HEQ_INSTR;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ case SPU_STOPPED_STATUS_P:
+ /* SPU_Status[P]=1. Add stop-and-signal instruction
+ * after end of restore code.
+ */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = STOP_INSTR | stopped_code;
+ break;
+ case SPU_STOPPED_STATUS_R:
+ /* SPU_Status[I,S,H,P,R]=0. Add infinite loop. */
+ exit_instrs[0] = RESTORE_COMPLETE;
+ exit_instrs[1] = NOP_INSTR;
+ exit_instrs[2] = NOP_INSTR;
+ exit_instrs[3] = BR_INSTR;
+ break;
+ default:
+ /* SPU_Status[R]=1. No additional instructions. */
+ break;
+ }
+ spu_sync();
+}
+
+/**
+ * main - entry point for SPU-side context restore.
+ *
+ * This code deviates from the documented sequence in the
+ * following aspects:
+ *
+ * 1. The EA for LSCSA is passed from PPE in the
+ * signal notification channels.
+ * 2. The register spill area is pulled by SPU
+ * into LS, rather than pushed by PPE.
+ * 3. All 128 registers are restored by exit().
+ * 4. The exit() function is modified at run
+ * time in order to properly restore the
+ * SPU_Status register.
+ */
+int main()
+{
+ addr64 lscsa_ea;
+
+ lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+ lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+ fetch_regs_from_mem(lscsa_ea);
+
+ set_event_mask(); /* Step 1. */
+ set_tag_mask(); /* Step 2. */
+ build_dma_list(lscsa_ea); /* Step 3. */
+ restore_upper_240kb(lscsa_ea); /* Step 4. */
+ /* Step 5: done by 'exit'. */
+ enqueue_putllc(lscsa_ea); /* Step 7. */
+ set_tag_update(); /* Step 8. */
+ read_tag_status(); /* Step 9. */
+ restore_decr(); /* moved Step 6. */
+ read_llar_status(); /* Step 10. */
+ write_ppu_mb(); /* Step 11. */
+ write_ppuint_mb(); /* Step 12. */
+ restore_fpcr(); /* Step 13. */
+ restore_srr0(); /* Step 14. */
+ restore_event_mask(); /* Step 15. */
+ restore_tag_mask(); /* Step 16. */
+ /* Step 17. done by 'exit'. */
+ restore_complete(); /* Step 18. */
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
new file mode 100644
index 000000000..6d799f847
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * crt0_r.S: Entry function for SPU-side context restore.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry and exit function for SPU-side of the context restore
+ * sequence. Sets up an initial stack frame, then branches to
+ * 'main'. On return, restores all 128 registers from the LSCSA
+ * and exits.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+ /* Initialize the stack pointer to point to 16368
+ * (16kb-16). The back chain pointer is initialized
+ * to NULL.
+ */
+ il $0, 0
+ il $SP, 16368
+ stqd $0, 0($SP)
+
+ /* Allocate a minimum stack frame for the called main.
+ * This is needed so that main has a place to save the
+ * link register when it calls another function.
+ */
+ stqd $SP, -160($SP)
+ ai $SP, $SP, -160
+
+ /* Call the program's main function. */
+ brsl $0, main
+
+.global exit
+.global _exit
+exit:
+_exit:
+ /* SPU Context Restore, Step 5: Restore the remaining 112 GPRs. */
+ ila $3, regs_spill + 256
+restore_regs:
+ lqr $4, restore_reg_insts
+restore_reg_loop:
+ ai $4, $4, 4
+ .balignl 16, 0x40200000
+restore_reg_insts: /* must be quad-word aligned. */
+ lqd $16, 0($3)
+ lqd $17, 16($3)
+ lqd $18, 32($3)
+ lqd $19, 48($3)
+ andi $5, $4, 0x7F
+ stqr $4, restore_reg_insts
+ ai $3, $3, 64
+ brnz $5, restore_reg_loop
+
+ /* SPU Context Restore Step 17: Restore the first 16 GPRs. */
+ lqa $0, regs_spill + 0
+ lqa $1, regs_spill + 16
+ lqa $2, regs_spill + 32
+ lqa $3, regs_spill + 48
+ lqa $4, regs_spill + 64
+ lqa $5, regs_spill + 80
+ lqa $6, regs_spill + 96
+ lqa $7, regs_spill + 112
+ lqa $8, regs_spill + 128
+ lqa $9, regs_spill + 144
+ lqa $10, regs_spill + 160
+ lqa $11, regs_spill + 176
+ lqa $12, regs_spill + 192
+ lqa $13, regs_spill + 208
+ lqa $14, regs_spill + 224
+ lqa $15, regs_spill + 240
+
+ /* Under normal circumstances, the 'exit' function
+ * terminates with 'stop SPU_RESTORE_COMPLETE',
+ * indicating that the SPU-side restore code has
+ * completed.
+ *
+ * However it is possible that instructions immediately
+ * following the 'stop 0x3ffc' have been modified at run
+ * time so as to recreate the exact SPU_Status settings
+ * from the application, e.g. illegal instruciton, halt,
+ * etc.
+ */
+.global exit_fini
+.global _exit_fini
+exit_fini:
+_exit_fini:
+ stop SPU_RESTORE_COMPLETE
+ stop 0
+ stop 0
+ stop 0
+
+ /* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
new file mode 100644
index 000000000..f383b027e
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
@@ -0,0 +1,935 @@
+/*
+ * spu_restore_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_restore.c.
+ * Do not edit!
+ */
+static unsigned int spu_restore_code[] __attribute__((__aligned__(128))) = {
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33001180,
+0x42034003,
+0x33800284,
+0x1c010204,
+0x40200000,
+0x40200000,
+0x40200000,
+0x34000190,
+0x34004191,
+0x34008192,
+0x3400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffa85,
+0x3080b000,
+0x3080b201,
+0x3080b402,
+0x3080b603,
+0x3080b804,
+0x3080ba05,
+0x3080bc06,
+0x3080be07,
+0x3080c008,
+0x3080c209,
+0x3080c40a,
+0x3080c60b,
+0x3080c80c,
+0x3080ca0d,
+0x3080cc0e,
+0x3080ce0f,
+0x00003ffc,
+0x00000000,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb0a14103,
+0x01a00204,
+0x3ec10083,
+0x4202c002,
+0xb0a14203,
+0x21a00802,
+0x3fbf028a,
+0x3f20050a,
+0x3fbe0502,
+0x3fe30102,
+0x21a00882,
+0x3f82028b,
+0x3fe3058b,
+0x3fbf0584,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x04000203,
+0x21a00903,
+0x40848002,
+0x21a00982,
+0x40800003,
+0x21a00a03,
+0x40802002,
+0x21a00a82,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x10002612,
+0x42a00003,
+0x42074006,
+0x1800c204,
+0x40a00008,
+0x40800789,
+0x1c010305,
+0x34000302,
+0x1cffc489,
+0x3ec00303,
+0x3ec00287,
+0xb0408403,
+0x24000302,
+0x34000282,
+0x1c020306,
+0xb0408207,
+0x18020204,
+0x24000282,
+0x217ffa09,
+0x04000402,
+0x21a00802,
+0x3fbe0504,
+0x3fe30204,
+0x21a00884,
+0x42074002,
+0x21a00902,
+0x40803c03,
+0x21a00983,
+0x04000485,
+0x21a00a05,
+0x40802202,
+0x21a00a82,
+0x21a00805,
+0x21a00884,
+0x3fbf0582,
+0x3f200102,
+0x3fbe0102,
+0x3fe30102,
+0x21a00902,
+0x40804003,
+0x21a00983,
+0x21a00a05,
+0x40805a02,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x30809c03,
+0x34000182,
+0x14004102,
+0x21002082,
+0x01a00d82,
+0x3080a003,
+0x34000182,
+0x21a00e02,
+0x3080a203,
+0x34000182,
+0x21a00f02,
+0x3080a403,
+0x34000182,
+0x77400100,
+0x3080a603,
+0x34000182,
+0x21a00702,
+0x3080a803,
+0x34000182,
+0x21a00082,
+0x3080aa03,
+0x34000182,
+0x21a00b02,
+0x4020007f,
+0x3080ae02,
+0x42004805,
+0x3080ac04,
+0x34000103,
+0x34000202,
+0x1cffc183,
+0x3b810106,
+0x0f608184,
+0x42013802,
+0x5c020183,
+0x38810102,
+0x3b810102,
+0x21000e83,
+0x4020007f,
+0x35000100,
+0x00000470,
+0x000002f8,
+0x00000430,
+0x00000360,
+0x000002f8,
+0x000003c8,
+0x000004a8,
+0x00000298,
+0x00000360,
+0x00200000,
+0x409ffe02,
+0x30801203,
+0x40800208,
+0x3ec40084,
+0x40800407,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38820282,
+0x41004003,
+0xb0408189,
+0x28820282,
+0x3881c282,
+0xb0408304,
+0x2881c282,
+0x00400000,
+0x40800003,
+0x35000000,
+0x30809e03,
+0x34000182,
+0x21a00382,
+0x4020007f,
+0x327fde00,
+0x409ffe02,
+0x30801203,
+0x40800206,
+0x3ec40084,
+0x40800407,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x38818282,
+0x41004003,
+0xb040818a,
+0x10005b0b,
+0x41201003,
+0x28818282,
+0x3881c282,
+0xb0408184,
+0x41193f83,
+0x60ffc003,
+0x2881c282,
+0x38820282,
+0xb0408189,
+0x28820282,
+0x327fef80,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40086,
+0x4120100b,
+0x10005b14,
+0x40800404,
+0x3ac1c289,
+0x40800608,
+0xb060c106,
+0x3ac10286,
+0x3ac2028a,
+0x20801203,
+0x3881c282,
+0x41193f83,
+0x60ffc003,
+0xb0408589,
+0x2881c282,
+0x38810282,
+0xb0408586,
+0x28810282,
+0x38820282,
+0xb040818a,
+0x28820282,
+0x4020007f,
+0x327fe280,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40084,
+0x40800408,
+0x10005b14,
+0x40800609,
+0x3ac1c28a,
+0x3ac2028b,
+0xb060c104,
+0x3ac24284,
+0x20801203,
+0x41201003,
+0x3881c282,
+0xb040830a,
+0x2881c282,
+0x38820282,
+0xb040818b,
+0x41193f83,
+0x60ffc003,
+0x28820282,
+0x38824282,
+0xb0408184,
+0x28824282,
+0x4020007f,
+0x327fd580,
+0x409ffe02,
+0x1000658e,
+0x40800206,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38818282,
+0x4020007f,
+0x327fd800,
+0x409ffe03,
+0x30801202,
+0x40800207,
+0x3ec40084,
+0x10005b09,
+0x3ac1c288,
+0xb0408184,
+0x4020007f,
+0x4020007f,
+0x20801202,
+0x3881c282,
+0xb0408308,
+0x2881c282,
+0x327fc680,
+0x409ffe02,
+0x1000588b,
+0x40800208,
+0x30801203,
+0x40800407,
+0x3ec40084,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
+0x20801203,
+0x413d8003,
+0x38820282,
+0x327fbd80,
+0x00200000,
+0x00000da0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d90,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000db0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dc0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d80,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000df0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000de0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dd0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e04,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e00,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c
new file mode 100644
index 000000000..28c88e324
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * spu_save.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * SPU-side context save sequence outlined in
+ * Synergistic Processor Element Book IV
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ */
+
+
+#ifndef LS_SIZE
+#define LS_SIZE 0x40000 /* 256K (in bytes) */
+#endif
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#include <spu_intrinsics.h>
+#include <asm/spu_csa.h>
+#include "spu_utils.h"
+
+static inline void save_event_mask(void)
+{
+ unsigned int offset;
+
+ /* Save, Step 2:
+ * Read the SPU_RdEventMsk channel and save to the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(event_mask);
+ regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask);
+}
+
+static inline void save_tag_mask(void)
+{
+ unsigned int offset;
+
+ /* Save, Step 3:
+ * Read the SPU_RdTagMsk channel and save to the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(tag_mask);
+ regs_spill[offset].slot[0] = spu_readch(MFC_RdTagMask);
+}
+
+static inline void save_upper_240kb(addr64 lscsa_ea)
+{
+ unsigned int ls = 16384;
+ unsigned int list = (unsigned int)&dma_list[0];
+ unsigned int size = sizeof(dma_list);
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0x24; /* PUTL */
+
+ /* Save, Step 7:
+ * Enqueue the PUTL command (tag 0) to the MFC SPU command
+ * queue to transfer the remaining 240 kb of LS to CSA.
+ */
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, list);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_fpcr(void)
+{
+ // vector unsigned int fpcr;
+ unsigned int offset;
+
+ /* Save, Step 9:
+ * Issue the floating-point status and control register
+ * read instruction, and save to the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(fpcr);
+ regs_spill[offset].v = spu_mffpscr();
+}
+
+static inline void save_decr(void)
+{
+ unsigned int offset;
+
+ /* Save, Step 10:
+ * Read and save the SPU_RdDec channel data to
+ * the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(decr);
+ regs_spill[offset].slot[0] = spu_readch(SPU_RdDec);
+}
+
+static inline void save_srr0(void)
+{
+ unsigned int offset;
+
+ /* Save, Step 11:
+ * Read and save the SPU_WSRR0 channel data to
+ * the LSCSA.
+ */
+ offset = LSCSA_QW_OFFSET(srr0);
+ regs_spill[offset].slot[0] = spu_readch(SPU_RdSRR0);
+}
+
+static inline void spill_regs_to_mem(addr64 lscsa_ea)
+{
+ unsigned int ls = (unsigned int)&regs_spill[0];
+ unsigned int size = sizeof(regs_spill);
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0x20; /* PUT */
+
+ /* Save, Step 13:
+ * Enqueue a PUT command (tag 0) to send the LSCSA
+ * to the CSA.
+ */
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void enqueue_sync(addr64 lscsa_ea)
+{
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0xCC;
+
+ /* Save, Step 14:
+ * Enqueue an MFC_SYNC command (tag 0).
+ */
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void save_complete(void)
+{
+ /* Save, Step 18:
+ * Issue a stop-and-signal instruction indicating
+ * "save complete". Note: This function will not
+ * return!!
+ */
+ spu_stop(SPU_SAVE_COMPLETE);
+}
+
+/**
+ * main - entry point for SPU-side context save.
+ *
+ * This code deviates from the documented sequence as follows:
+ *
+ * 1. The EA for LSCSA is passed from PPE in the
+ * signal notification channels.
+ * 2. All 128 registers are saved by crt0.o.
+ */
+int main()
+{
+ addr64 lscsa_ea;
+
+ lscsa_ea.ui[0] = spu_readch(SPU_RdSigNotify1);
+ lscsa_ea.ui[1] = spu_readch(SPU_RdSigNotify2);
+
+ /* Step 1: done by exit(). */
+ save_event_mask(); /* Step 2. */
+ save_tag_mask(); /* Step 3. */
+ set_event_mask(); /* Step 4. */
+ set_tag_mask(); /* Step 5. */
+ build_dma_list(lscsa_ea); /* Step 6. */
+ save_upper_240kb(lscsa_ea); /* Step 7. */
+ /* Step 8: done by exit(). */
+ save_fpcr(); /* Step 9. */
+ save_decr(); /* Step 10. */
+ save_srr0(); /* Step 11. */
+ enqueue_putllc(lscsa_ea); /* Step 12. */
+ spill_regs_to_mem(lscsa_ea); /* Step 13. */
+ enqueue_sync(lscsa_ea); /* Step 14. */
+ set_tag_update(); /* Step 15. */
+ read_tag_status(); /* Step 16. */
+ read_llar_status(); /* Step 17. */
+ save_complete(); /* Step 18. */
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
new file mode 100644
index 000000000..5ce32efdc
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_crt0.S
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * crt0_s.S: Entry function for SPU-side context save.
+ *
+ * Copyright (C) 2005 IBM
+ *
+ * Entry function for SPU-side of the context save sequence.
+ * Saves all 128 GPRs, sets up an initial stack frame, then
+ * branches to 'main'.
+ */
+
+#include <asm/spu_csa.h>
+
+.data
+.align 7
+.globl regs_spill
+regs_spill:
+.space SIZEOF_SPU_SPILL_REGS, 0x0
+
+.text
+.global _start
+_start:
+ /* SPU Context Save Step 1: Save the first 16 GPRs. */
+ stqa $0, regs_spill + 0
+ stqa $1, regs_spill + 16
+ stqa $2, regs_spill + 32
+ stqa $3, regs_spill + 48
+ stqa $4, regs_spill + 64
+ stqa $5, regs_spill + 80
+ stqa $6, regs_spill + 96
+ stqa $7, regs_spill + 112
+ stqa $8, regs_spill + 128
+ stqa $9, regs_spill + 144
+ stqa $10, regs_spill + 160
+ stqa $11, regs_spill + 176
+ stqa $12, regs_spill + 192
+ stqa $13, regs_spill + 208
+ stqa $14, regs_spill + 224
+ stqa $15, regs_spill + 240
+
+ /* SPU Context Save, Step 8: Save the remaining 112 GPRs. */
+ ila $3, regs_spill + 256
+save_regs:
+ lqr $4, save_reg_insts
+save_reg_loop:
+ ai $4, $4, 4
+ .balignl 16, 0x40200000
+save_reg_insts: /* must be quad-word aligned. */
+ stqd $16, 0($3)
+ stqd $17, 16($3)
+ stqd $18, 32($3)
+ stqd $19, 48($3)
+ andi $5, $4, 0x7F
+ stqr $4, save_reg_insts
+ ai $3, $3, 64
+ brnz $5, save_reg_loop
+
+ /* Initialize the stack pointer to point to 16368
+ * (16kb-16). The back chain pointer is initialized
+ * to NULL.
+ */
+ il $0, 0
+ il $SP, 16368
+ stqd $0, 0($SP)
+
+ /* Allocate a minimum stack frame for the called main.
+ * This is needed so that main has a place to save the
+ * link register when it calls another function.
+ */
+ stqd $SP, -160($SP)
+ ai $SP, $SP, -160
+
+ /* Call the program's main function. */
+ brsl $0, main
+
+ /* In this case main should not return; if it does
+ * there has been an error in the sequence. Execute
+ * stop-and-signal with code=0.
+ */
+.global exit
+.global _exit
+exit:
+_exit:
+ stop 0x0
+
+ /* Pad the size of this crt0.o to be multiple of 16 bytes. */
+.balignl 16, 0x0
+
diff --git a/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
new file mode 100644
index 000000000..b9f81ac8a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_save_dump.h_shipped
@@ -0,0 +1,743 @@
+/*
+ * spu_save_dump.h: Copyright (C) 2005 IBM.
+ * Hex-dump auto generated from spu_save.c.
+ * Do not edit!
+ */
+static unsigned int spu_save_code[] __attribute__((__aligned__(128))) = {
+0x20805000,
+0x20805201,
+0x20805402,
+0x20805603,
+0x20805804,
+0x20805a05,
+0x20805c06,
+0x20805e07,
+0x20806008,
+0x20806209,
+0x2080640a,
+0x2080660b,
+0x2080680c,
+0x20806a0d,
+0x20806c0e,
+0x20806e0f,
+0x4201c003,
+0x33800184,
+0x1c010204,
+0x40200000,
+0x24000190,
+0x24004191,
+0x24008192,
+0x2400c193,
+0x141fc205,
+0x23fffd84,
+0x1c100183,
+0x217ffb85,
+0x40800000,
+0x409ff801,
+0x24000080,
+0x24fd8081,
+0x1cd80081,
+0x33000180,
+0x00000000,
+0x00000000,
+0x01a00182,
+0x3ec00083,
+0xb1c38103,
+0x01a00204,
+0x3ec10082,
+0x4201400d,
+0xb1c38202,
+0x01a00583,
+0x34218682,
+0x3ed80684,
+0xb0408184,
+0x24218682,
+0x01a00603,
+0x00200000,
+0x34214682,
+0x3ed40684,
+0xb0408184,
+0x40800003,
+0x24214682,
+0x21a00083,
+0x40800082,
+0x21a00b02,
+0x4020007f,
+0x1000251e,
+0x42a00002,
+0x32800008,
+0x4205c00c,
+0x00200000,
+0x40a0000b,
+0x3f82070f,
+0x4080020a,
+0x40800709,
+0x3fe3078f,
+0x3fbf0783,
+0x3f200183,
+0x3fbe0183,
+0x3fe30187,
+0x18008387,
+0x4205c002,
+0x3ac30404,
+0x1cffc489,
+0x00200000,
+0x18008403,
+0x38830402,
+0x4cffc486,
+0x3ac28185,
+0xb0408584,
+0x28830402,
+0x1c020408,
+0x38828182,
+0xb0408385,
+0x1802c387,
+0x28828182,
+0x217ff886,
+0x04000582,
+0x32800007,
+0x21a00802,
+0x3fbf0705,
+0x3f200285,
+0x3fbe0285,
+0x3fe30285,
+0x21a00885,
+0x04000603,
+0x21a00903,
+0x40803c02,
+0x21a00982,
+0x04000386,
+0x21a00a06,
+0x40801202,
+0x21a00a82,
+0x73000003,
+0x24200683,
+0x01a00404,
+0x00200000,
+0x34204682,
+0x3ec40683,
+0xb0408203,
+0x24204682,
+0x01a00783,
+0x00200000,
+0x3421c682,
+0x3edc0684,
+0xb0408184,
+0x2421c682,
+0x21a00806,
+0x21a00885,
+0x3fbf0784,
+0x3f200204,
+0x3fbe0204,
+0x3fe30204,
+0x21a00904,
+0x40804002,
+0x21a00982,
+0x21a00a06,
+0x40805a02,
+0x21a00a82,
+0x04000683,
+0x21a00803,
+0x21a00885,
+0x21a00904,
+0x40848002,
+0x21a00982,
+0x21a00a06,
+0x40801002,
+0x21a00a82,
+0x21a00a06,
+0x40806602,
+0x00200000,
+0x35800009,
+0x21a00a82,
+0x40800083,
+0x21a00b83,
+0x01a00c02,
+0x01a00d83,
+0x00003ffb,
+0x40800003,
+0x4020007f,
+0x35000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000000,
+};
diff --git a/arch/powerpc/platforms/cell/spufs/spu_utils.h b/arch/powerpc/platforms/cell/spufs/spu_utils.h
new file mode 100644
index 000000000..4fc1ebb45
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spu_utils.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * utils.h: Utilities for SPU-side of the context switch operation.
+ *
+ * (C) Copyright IBM 2005
+ */
+
+#ifndef _SPU_CONTEXT_UTILS_H_
+#define _SPU_CONTEXT_UTILS_H_
+
+/*
+ * 64-bit safe EA.
+ */
+typedef union {
+ unsigned long long ull;
+ unsigned int ui[2];
+} addr64;
+
+/*
+ * 128-bit register template.
+ */
+typedef union {
+ unsigned int slot[4];
+ vector unsigned int v;
+} spu_reg128v;
+
+/*
+ * DMA list structure.
+ */
+struct dma_list_elem {
+ unsigned int size;
+ unsigned int ea_low;
+};
+
+/*
+ * Declare storage for 8-byte aligned DMA list.
+ */
+struct dma_list_elem dma_list[15] __attribute__ ((aligned(8)));
+
+/*
+ * External definition for storage
+ * declared in crt0.
+ */
+extern spu_reg128v regs_spill[NR_SPU_SPILL_REGS];
+
+/*
+ * Compute LSCSA byte offset for a given field.
+ */
+static struct spu_lscsa *dummy = (struct spu_lscsa *)0;
+#define LSCSA_BYTE_OFFSET(_field) \
+ ((char *)(&(dummy->_field)) - (char *)(&(dummy->gprs[0].slot[0])))
+#define LSCSA_QW_OFFSET(_field) (LSCSA_BYTE_OFFSET(_field) >> 4)
+
+static inline void set_event_mask(void)
+{
+ unsigned int event_mask = 0;
+
+ /* Save, Step 4:
+ * Restore, Step 1:
+ * Set the SPU_RdEventMsk channel to zero to mask
+ * all events.
+ */
+ spu_writech(SPU_WrEventMask, event_mask);
+}
+
+static inline void set_tag_mask(void)
+{
+ unsigned int tag_mask = 1;
+
+ /* Save, Step 5:
+ * Restore, Step 2:
+ * Set the SPU_WrTagMsk channel to '01' to unmask
+ * only tag group 0.
+ */
+ spu_writech(MFC_WrTagMask, tag_mask);
+}
+
+static inline void build_dma_list(addr64 lscsa_ea)
+{
+ unsigned int ea_low;
+ int i;
+
+ /* Save, Step 6:
+ * Restore, Step 3:
+ * Update the effective address for the CSA in the
+ * pre-canned DMA-list in local storage.
+ */
+ ea_low = lscsa_ea.ui[1];
+ ea_low += LSCSA_BYTE_OFFSET(ls[16384]);
+
+ for (i = 0; i < 15; i++, ea_low += 16384) {
+ dma_list[i].size = 16384;
+ dma_list[i].ea_low = ea_low;
+ }
+}
+
+static inline void enqueue_putllc(addr64 lscsa_ea)
+{
+ unsigned int ls = 0;
+ unsigned int size = 128;
+ unsigned int tag_id = 0;
+ unsigned int cmd = 0xB4; /* PUTLLC */
+
+ /* Save, Step 12:
+ * Restore, Step 7:
+ * Send a PUTLLC (tag 0) command to the MFC using
+ * an effective address in the CSA in order to
+ * remove any possible lock-line reservation.
+ */
+ spu_writech(MFC_LSA, ls);
+ spu_writech(MFC_EAH, lscsa_ea.ui[0]);
+ spu_writech(MFC_EAL, lscsa_ea.ui[1]);
+ spu_writech(MFC_Size, size);
+ spu_writech(MFC_TagID, tag_id);
+ spu_writech(MFC_Cmd, cmd);
+}
+
+static inline void set_tag_update(void)
+{
+ unsigned int update_any = 1;
+
+ /* Save, Step 15:
+ * Restore, Step 8:
+ * Write the MFC_TagUpdate channel with '01'.
+ */
+ spu_writech(MFC_WrTagUpdate, update_any);
+}
+
+static inline void read_tag_status(void)
+{
+ /* Save, Step 16:
+ * Restore, Step 9:
+ * Read the MFC_TagStat channel data.
+ */
+ spu_readch(MFC_RdTagStat);
+}
+
+static inline void read_llar_status(void)
+{
+ /* Save, Step 17:
+ * Restore, Step 10:
+ * Read the MFC_AtomicStat channel data.
+ */
+ spu_readch(MFC_RdAtomicStat);
+}
+
+#endif /* _SPU_CONTEXT_UTILS_H_ */
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
new file mode 100644
index 000000000..84958487f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SPU file system
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ */
+#ifndef SPUFS_H
+#define SPUFS_H
+
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cpumask.h>
+#include <linux/sched/signal.h>
+
+#include <asm/spu.h>
+#include <asm/spu_csa.h>
+#include <asm/spu_info.h>
+
+#define SPUFS_PS_MAP_SIZE 0x20000
+#define SPUFS_MFC_MAP_SIZE 0x1000
+#define SPUFS_CNTL_MAP_SIZE 0x1000
+#define SPUFS_SIGNAL_MAP_SIZE PAGE_SIZE
+#define SPUFS_MSS_MAP_SIZE 0x1000
+
+/* The magic number for our file system */
+enum {
+ SPUFS_MAGIC = 0x23c9b64e,
+};
+
+struct spu_context_ops;
+struct spu_gang;
+
+/* ctx->sched_flags */
+enum {
+ SPU_SCHED_NOTIFY_ACTIVE,
+ SPU_SCHED_WAS_ACTIVE, /* was active upon spu_acquire_saved() */
+ SPU_SCHED_SPU_RUN, /* context is within spu_run */
+};
+
+enum {
+ SWITCH_LOG_BUFSIZE = 4096,
+};
+
+enum {
+ SWITCH_LOG_START,
+ SWITCH_LOG_STOP,
+ SWITCH_LOG_EXIT,
+};
+
+struct switch_log {
+ wait_queue_head_t wait;
+ unsigned long head;
+ unsigned long tail;
+ struct switch_log_entry {
+ struct timespec64 tstamp;
+ s32 spu_id;
+ u32 type;
+ u32 val;
+ u64 timebase;
+ } log[];
+};
+
+struct spu_context {
+ struct spu *spu; /* pointer to a physical SPU */
+ struct spu_state csa; /* SPU context save area. */
+ spinlock_t mmio_lock; /* protects mmio access */
+ struct address_space *local_store; /* local store mapping. */
+ struct address_space *mfc; /* 'mfc' area mappings. */
+ struct address_space *cntl; /* 'control' area mappings. */
+ struct address_space *signal1; /* 'signal1' area mappings. */
+ struct address_space *signal2; /* 'signal2' area mappings. */
+ struct address_space *mss; /* 'mss' area mappings. */
+ struct address_space *psmap; /* 'psmap' area mappings. */
+ struct mutex mapping_lock;
+ u64 object_id; /* user space pointer for GNU Debugger */
+
+ enum { SPU_STATE_RUNNABLE, SPU_STATE_SAVED } state;
+ struct mutex state_mutex;
+ struct mutex run_mutex;
+
+ struct mm_struct *owner;
+
+ struct kref kref;
+ wait_queue_head_t ibox_wq;
+ wait_queue_head_t wbox_wq;
+ wait_queue_head_t stop_wq;
+ wait_queue_head_t mfc_wq;
+ wait_queue_head_t run_wq;
+ u32 tagwait;
+ struct spu_context_ops *ops;
+ struct work_struct reap_work;
+ unsigned long flags;
+ unsigned long event_return;
+
+ struct list_head gang_list;
+ struct spu_gang *gang;
+ struct kref *prof_priv_kref;
+ void ( * prof_priv_release) (struct kref *kref);
+
+ /* owner thread */
+ pid_t tid;
+
+ /* scheduler fields */
+ struct list_head rq;
+ unsigned int time_slice;
+ unsigned long sched_flags;
+ cpumask_t cpus_allowed;
+ int policy;
+ int prio;
+ int last_ran;
+
+ /* statistics */
+ struct {
+ /* updates protected by ctx->state_mutex */
+ enum spu_utilization_state util_state;
+ unsigned long long tstamp; /* time of last state switch */
+ unsigned long long times[SPU_UTIL_MAX];
+ unsigned long long vol_ctx_switch;
+ unsigned long long invol_ctx_switch;
+ unsigned long long min_flt;
+ unsigned long long maj_flt;
+ unsigned long long hash_flt;
+ unsigned long long slb_flt;
+ unsigned long long slb_flt_base; /* # at last ctx switch */
+ unsigned long long class2_intr;
+ unsigned long long class2_intr_base; /* # at last ctx switch */
+ unsigned long long libassist;
+ } stats;
+
+ /* context switch log */
+ struct switch_log *switch_log;
+
+ struct list_head aff_list;
+ int aff_head;
+ int aff_offset;
+};
+
+struct spu_gang {
+ struct list_head list;
+ struct mutex mutex;
+ struct kref kref;
+ int contexts;
+
+ struct spu_context *aff_ref_ctx;
+ struct list_head aff_list_head;
+ struct mutex aff_mutex;
+ int aff_flags;
+ struct spu *aff_ref_spu;
+ atomic_t aff_sched_count;
+};
+
+/* Flag bits for spu_gang aff_flags */
+#define AFF_OFFSETS_SET 1
+#define AFF_MERGED 2
+
+struct mfc_dma_command {
+ int32_t pad; /* reserved */
+ uint32_t lsa; /* local storage address */
+ uint64_t ea; /* effective address */
+ uint16_t size; /* transfer size */
+ uint16_t tag; /* command tag */
+ uint16_t class; /* class ID */
+ uint16_t cmd; /* command opcode */
+};
+
+
+/* SPU context query/set operations. */
+struct spu_context_ops {
+ int (*mbox_read) (struct spu_context * ctx, u32 * data);
+ u32(*mbox_stat_read) (struct spu_context * ctx);
+ __poll_t (*mbox_stat_poll)(struct spu_context *ctx, __poll_t events);
+ int (*ibox_read) (struct spu_context * ctx, u32 * data);
+ int (*wbox_write) (struct spu_context * ctx, u32 data);
+ u32(*signal1_read) (struct spu_context * ctx);
+ void (*signal1_write) (struct spu_context * ctx, u32 data);
+ u32(*signal2_read) (struct spu_context * ctx);
+ void (*signal2_write) (struct spu_context * ctx, u32 data);
+ void (*signal1_type_set) (struct spu_context * ctx, u64 val);
+ u64(*signal1_type_get) (struct spu_context * ctx);
+ void (*signal2_type_set) (struct spu_context * ctx, u64 val);
+ u64(*signal2_type_get) (struct spu_context * ctx);
+ u32(*npc_read) (struct spu_context * ctx);
+ void (*npc_write) (struct spu_context * ctx, u32 data);
+ u32(*status_read) (struct spu_context * ctx);
+ char*(*get_ls) (struct spu_context * ctx);
+ void (*privcntl_write) (struct spu_context *ctx, u64 data);
+ u32 (*runcntl_read) (struct spu_context * ctx);
+ void (*runcntl_write) (struct spu_context * ctx, u32 data);
+ void (*runcntl_stop) (struct spu_context * ctx);
+ void (*master_start) (struct spu_context * ctx);
+ void (*master_stop) (struct spu_context * ctx);
+ int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode);
+ u32 (*read_mfc_tagstatus)(struct spu_context * ctx);
+ u32 (*get_mfc_free_elements)(struct spu_context *ctx);
+ int (*send_mfc_command)(struct spu_context * ctx,
+ struct mfc_dma_command * cmd);
+ void (*dma_info_read) (struct spu_context * ctx,
+ struct spu_dma_info * info);
+ void (*proxydma_info_read) (struct spu_context * ctx,
+ struct spu_proxydma_info * info);
+ void (*restart_dma)(struct spu_context *ctx);
+};
+
+extern struct spu_context_ops spu_hw_ops;
+extern struct spu_context_ops spu_backing_ops;
+
+struct spufs_inode_info {
+ struct spu_context *i_ctx;
+ struct spu_gang *i_gang;
+ struct inode vfs_inode;
+ int i_openers;
+};
+#define SPUFS_I(inode) \
+ container_of(inode, struct spufs_inode_info, vfs_inode)
+
+struct spufs_tree_descr {
+ const char *name;
+ const struct file_operations *ops;
+ umode_t mode;
+ size_t size;
+};
+
+extern const struct spufs_tree_descr spufs_dir_contents[];
+extern const struct spufs_tree_descr spufs_dir_nosched_contents[];
+extern const struct spufs_tree_descr spufs_dir_debug_contents[];
+
+/* system call implementation */
+extern struct spufs_calls spufs_calls;
+struct coredump_params;
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
+long spufs_create(const struct path *nd, struct dentry *dentry, unsigned int flags,
+ umode_t mode, struct file *filp);
+/* ELF coredump callbacks for writing SPU ELF notes */
+extern int spufs_coredump_extra_notes_size(void);
+extern int spufs_coredump_extra_notes_write(struct coredump_params *cprm);
+
+extern const struct file_operations spufs_context_fops;
+
+/* gang management */
+struct spu_gang *alloc_spu_gang(void);
+struct spu_gang *get_spu_gang(struct spu_gang *gang);
+int put_spu_gang(struct spu_gang *gang);
+void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx);
+void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
+
+/* fault handling */
+int spufs_handle_class1(struct spu_context *ctx);
+int spufs_handle_class0(struct spu_context *ctx);
+
+/* affinity */
+struct spu *affinity_check(struct spu_context *ctx);
+
+/* context management */
+extern atomic_t nr_spu_contexts;
+static inline int __must_check spu_acquire(struct spu_context *ctx)
+{
+ return mutex_lock_interruptible(&ctx->state_mutex);
+}
+
+static inline void spu_release(struct spu_context *ctx)
+{
+ mutex_unlock(&ctx->state_mutex);
+}
+
+struct spu_context * alloc_spu_context(struct spu_gang *gang);
+void destroy_spu_context(struct kref *kref);
+struct spu_context * get_spu_context(struct spu_context *ctx);
+int put_spu_context(struct spu_context *ctx);
+void spu_unmap_mappings(struct spu_context *ctx);
+
+void spu_forget(struct spu_context *ctx);
+int __must_check spu_acquire_saved(struct spu_context *ctx);
+void spu_release_saved(struct spu_context *ctx);
+
+int spu_stopped(struct spu_context *ctx, u32 * stat);
+void spu_del_from_rq(struct spu_context *ctx);
+int spu_activate(struct spu_context *ctx, unsigned long flags);
+void spu_deactivate(struct spu_context *ctx);
+void spu_yield(struct spu_context *ctx);
+void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
+ u32 type, u32 val);
+void spu_set_timeslice(struct spu_context *ctx);
+void spu_update_sched_info(struct spu_context *ctx);
+void __spu_update_sched_info(struct spu_context *ctx);
+int __init spu_sched_init(void);
+void spu_sched_exit(void);
+
+extern char *isolated_loader;
+
+/*
+ * spufs_wait
+ * Same as wait_event_interruptible(), except that here
+ * we need to call spu_release(ctx) before sleeping, and
+ * then spu_acquire(ctx) when awoken.
+ *
+ * Returns with state_mutex re-acquired when successful or
+ * with -ERESTARTSYS and the state_mutex dropped when interrupted.
+ */
+
+#define spufs_wait(wq, condition) \
+({ \
+ int __ret = 0; \
+ DEFINE_WAIT(__wait); \
+ for (;;) { \
+ prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ spu_release(ctx); \
+ if (signal_pending(current)) { \
+ __ret = -ERESTARTSYS; \
+ break; \
+ } \
+ schedule(); \
+ __ret = spu_acquire(ctx); \
+ if (__ret) \
+ break; \
+ } \
+ finish_wait(&(wq), &__wait); \
+ __ret; \
+})
+
+size_t spu_wbox_write(struct spu_context *ctx, u32 data);
+size_t spu_ibox_read(struct spu_context *ctx, u32 *data);
+
+/* irq callback funcs. */
+void spufs_ibox_callback(struct spu *spu);
+void spufs_wbox_callback(struct spu *spu);
+void spufs_stop_callback(struct spu *spu, int irq);
+void spufs_mfc_callback(struct spu *spu);
+void spufs_dma_callback(struct spu *spu, int type);
+
+struct spufs_coredump_reader {
+ char *name;
+ ssize_t (*dump)(struct spu_context *ctx, struct coredump_params *cprm);
+ u64 (*get)(struct spu_context *ctx);
+ size_t size;
+};
+extern const struct spufs_coredump_reader spufs_coredump_read[];
+
+extern int spu_init_csa(struct spu_state *csa);
+extern void spu_fini_csa(struct spu_state *csa);
+extern int spu_save(struct spu_state *prev, struct spu *spu);
+extern int spu_restore(struct spu_state *new, struct spu *spu);
+extern int spu_switch(struct spu_state *prev, struct spu_state *new,
+ struct spu *spu);
+extern int spu_alloc_lscsa(struct spu_state *csa);
+extern void spu_free_lscsa(struct spu_state *csa);
+
+extern void spuctx_switch_state(struct spu_context *ctx,
+ enum spu_utilization_state new_state);
+
+#endif
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.h b/arch/powerpc/platforms/cell/spufs/sputrace.h
new file mode 100644
index 000000000..1def11e91
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_SPUFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SPUFS_H
+
+#include <linux/tracepoint.h>
+#include <linux/stringify.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM spufs
+
+TRACE_EVENT(spufs_context,
+ TP_PROTO(struct spu_context *ctx, struct spu *spu, const char *name),
+ TP_ARGS(ctx, spu, name),
+
+ TP_STRUCT__entry(
+ __field(const char *, name)
+ __field(int, owner_tid)
+ __field(int, number)
+ ),
+
+ TP_fast_assign(
+ __entry->name = name;
+ __entry->owner_tid = ctx->tid;
+ __entry->number = spu ? spu->number : -1;
+ ),
+
+ TP_printk("%s (ctxthread = %d, spu = %d)",
+ __entry->name, __entry->owner_tid, __entry->number)
+);
+
+#define spu_context_trace(name, ctx, spu) \
+ trace_spufs_context(ctx, spu, __stringify(name))
+#define spu_context_nospu_trace(name, ctx) \
+ trace_spufs_context(ctx, NULL, __stringify(name))
+
+#endif /* _TRACE_SPUFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE sputrace
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
new file mode 100644
index 000000000..b41e81b22
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -0,0 +1,2206 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * spu_switch.c
+ *
+ * (C) Copyright IBM Corp. 2005
+ *
+ * Author: Mark Nutter <mnutter@us.ibm.com>
+ *
+ * Host-side part of SPU context switch sequence outlined in
+ * Synergistic Processor Element, Book IV.
+ *
+ * A fully premptive switch of an SPE is very expensive in terms
+ * of time and system resources. SPE Book IV indicates that SPE
+ * allocation should follow a "serially reusable device" model,
+ * in which the SPE is assigned a task until it completes. When
+ * this is not possible, this sequence may be used to premptively
+ * save, and then later (optionally) restore the context of a
+ * program executing on an SPE.
+ */
+
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+
+#include <asm/io.h>
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/spu_csa.h>
+#include <asm/mmu_context.h>
+
+#include "spufs.h"
+
+#include "spu_save_dump.h"
+#include "spu_restore_dump.h"
+
+#if 0
+#define POLL_WHILE_TRUE(_c) { \
+ do { \
+ } while (_c); \
+ }
+#else
+#define RELAX_SPIN_COUNT 1000
+#define POLL_WHILE_TRUE(_c) { \
+ do { \
+ int _i; \
+ for (_i=0; _i<RELAX_SPIN_COUNT && (_c); _i++) { \
+ cpu_relax(); \
+ } \
+ if (unlikely(_c)) yield(); \
+ else break; \
+ } while (_c); \
+ }
+#endif /* debug */
+
+#define POLL_WHILE_FALSE(_c) POLL_WHILE_TRUE(!(_c))
+
+static inline void acquire_spu_lock(struct spu *spu)
+{
+ /* Save, Step 1:
+ * Restore, Step 1:
+ * Acquire SPU-specific mutual exclusion lock.
+ * TBD.
+ */
+}
+
+static inline void release_spu_lock(struct spu *spu)
+{
+ /* Restore, Step 76:
+ * Release SPU-specific mutual exclusion lock.
+ * TBD.
+ */
+}
+
+static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 isolate_state;
+
+ /* Save, Step 2:
+ * Save, Step 6:
+ * If SPU_Status[E,L,IS] any field is '1', this
+ * SPU is in isolate state and cannot be context
+ * saved at this time.
+ */
+ isolate_state = SPU_STATUS_ISOLATED_STATE |
+ SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS;
+ return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0;
+}
+
+static inline void disable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 3:
+ * Restore, Step 2:
+ * Save INT_Mask_class0 in CSA.
+ * Write INT_MASK_class0 with value of 0.
+ * Save INT_Mask_class1 in CSA.
+ * Write INT_MASK_class1 with value of 0.
+ * Save INT_Mask_class2 in CSA.
+ * Write INT_MASK_class2 with value of 0.
+ * Synchronize all three interrupts to be sure
+ * we no longer execute a handler on another CPU.
+ */
+ spin_lock_irq(&spu->register_lock);
+ if (csa) {
+ csa->priv1.int_mask_class0_RW = spu_int_mask_get(spu, 0);
+ csa->priv1.int_mask_class1_RW = spu_int_mask_get(spu, 1);
+ csa->priv1.int_mask_class2_RW = spu_int_mask_get(spu, 2);
+ }
+ spu_int_mask_set(spu, 0, 0ul);
+ spu_int_mask_set(spu, 1, 0ul);
+ spu_int_mask_set(spu, 2, 0ul);
+ eieio();
+ spin_unlock_irq(&spu->register_lock);
+
+ /*
+ * This flag needs to be set before calling synchronize_irq so
+ * that the update will be visible to the relevant handlers
+ * via a simple load.
+ */
+ set_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+ clear_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags);
+ synchronize_irq(spu->irqs[0]);
+ synchronize_irq(spu->irqs[1]);
+ synchronize_irq(spu->irqs[2]);
+}
+
+static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 4:
+ * Restore, Step 25.
+ * Set a software watchdog timer, which specifies the
+ * maximum allowable time for a context save sequence.
+ *
+ * For present, this implementation will not set a global
+ * watchdog timer, as virtualization & variable system load
+ * may cause unpredictable execution times.
+ */
+}
+
+static inline void inhibit_user_access(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 5:
+ * Restore, Step 3:
+ * Inhibit user-space access (if provided) to this
+ * SPU by unmapping the virtual pages assigned to
+ * the SPU memory-mapped I/O (MMIO) for problem
+ * state. TBD.
+ */
+}
+
+static inline void set_switch_pending(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 7:
+ * Restore, Step 5:
+ * Set a software context switch pending flag.
+ * Done above in Step 3 - disable_interrupts().
+ */
+}
+
+static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 8:
+ * Suspend DMA and save MFC_CNTL.
+ */
+ switch (in_be64(&priv2->mfc_control_RW) &
+ MFC_CNTL_SUSPEND_DMA_STATUS_MASK) {
+ case MFC_CNTL_SUSPEND_IN_PROGRESS:
+ POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+ MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+ MFC_CNTL_SUSPEND_COMPLETE);
+ fallthrough;
+ case MFC_CNTL_SUSPEND_COMPLETE:
+ if (csa)
+ csa->priv2.mfc_control_RW =
+ in_be64(&priv2->mfc_control_RW) |
+ MFC_CNTL_SUSPEND_DMA_QUEUE;
+ break;
+ case MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION:
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+ POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+ MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+ MFC_CNTL_SUSPEND_COMPLETE);
+ if (csa)
+ csa->priv2.mfc_control_RW =
+ in_be64(&priv2->mfc_control_RW) &
+ ~MFC_CNTL_SUSPEND_DMA_QUEUE &
+ ~MFC_CNTL_SUSPEND_MASK;
+ break;
+ }
+}
+
+static inline void save_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 9:
+ * Save SPU_Runcntl in the CSA. This value contains
+ * the "Application Desired State".
+ */
+ csa->prob.spu_runcntl_RW = in_be32(&prob->spu_runcntl_RW);
+}
+
+static inline void save_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 10:
+ * Save MFC_SR1 in the CSA.
+ */
+ csa->priv1.mfc_sr1_RW = spu_mfc_sr1_get(spu);
+}
+
+static inline void save_spu_status(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 11:
+ * Read SPU_Status[R], and save to CSA.
+ */
+ if ((in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) == 0) {
+ csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+ } else {
+ u32 stopped;
+
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ stopped =
+ SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+ if ((in_be32(&prob->spu_status_R) & stopped) == 0)
+ csa->prob.spu_status_R = SPU_STATUS_RUNNING;
+ else
+ csa->prob.spu_status_R = in_be32(&prob->spu_status_R);
+ }
+}
+
+static inline void save_mfc_stopped_status(struct spu_state *csa,
+ struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ const u64 mask = MFC_CNTL_DECREMENTER_RUNNING |
+ MFC_CNTL_DMA_QUEUES_EMPTY;
+
+ /* Save, Step 12:
+ * Read MFC_CNTL[Ds]. Update saved copy of
+ * CSA.MFC_CNTL[Ds].
+ *
+ * update: do the same with MFC_CNTL[Q].
+ */
+ csa->priv2.mfc_control_RW &= ~mask;
+ csa->priv2.mfc_control_RW |= in_be64(&priv2->mfc_control_RW) & mask;
+}
+
+static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 13:
+ * Write MFC_CNTL[Dh] set to a '1' to halt
+ * the decrementer.
+ */
+ out_be64(&priv2->mfc_control_RW,
+ MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
+ eieio();
+}
+
+static inline void save_timebase(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 14:
+ * Read PPE Timebase High and Timebase low registers
+ * and save in CSA. TBD.
+ */
+ csa->suspend_time = get_cycles();
+}
+
+static inline void remove_other_spu_access(struct spu_state *csa,
+ struct spu *spu)
+{
+ /* Save, Step 15:
+ * Remove other SPU access to this SPU by unmapping
+ * this SPU's pages from their address space. TBD.
+ */
+}
+
+static inline void do_mfc_mssync(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 16:
+ * Restore, Step 11.
+ * Write SPU_MSSync register. Poll SPU_MSSync[P]
+ * for a value of 0.
+ */
+ out_be64(&prob->spc_mssync_RW, 1UL);
+ POLL_WHILE_TRUE(in_be64(&prob->spc_mssync_RW) & MS_SYNC_PENDING);
+}
+
+static inline void issue_mfc_tlbie(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 17:
+ * Restore, Step 12.
+ * Restore, Step 48.
+ * Write TLB_Invalidate_Entry[IS,VPN,L,Lp]=0 register.
+ * Then issue a PPE sync instruction.
+ */
+ spu_tlb_invalidate(spu);
+ mb();
+}
+
+static inline void handle_pending_interrupts(struct spu_state *csa,
+ struct spu *spu)
+{
+ /* Save, Step 18:
+ * Handle any pending interrupts from this SPU
+ * here. This is OS or hypervisor specific. One
+ * option is to re-enable interrupts to handle any
+ * pending interrupts, with the interrupt handlers
+ * recognizing the software Context Switch Pending
+ * flag, to ensure the SPU execution or MFC command
+ * queue is not restarted. TBD.
+ */
+}
+
+static inline void save_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int i;
+
+ /* Save, Step 19:
+ * If MFC_Cntl[Se]=0 then save
+ * MFC command queues.
+ */
+ if ((in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DMA_QUEUES_EMPTY) == 0) {
+ for (i = 0; i < 8; i++) {
+ csa->priv2.puq[i].mfc_cq_data0_RW =
+ in_be64(&priv2->puq[i].mfc_cq_data0_RW);
+ csa->priv2.puq[i].mfc_cq_data1_RW =
+ in_be64(&priv2->puq[i].mfc_cq_data1_RW);
+ csa->priv2.puq[i].mfc_cq_data2_RW =
+ in_be64(&priv2->puq[i].mfc_cq_data2_RW);
+ csa->priv2.puq[i].mfc_cq_data3_RW =
+ in_be64(&priv2->puq[i].mfc_cq_data3_RW);
+ }
+ for (i = 0; i < 16; i++) {
+ csa->priv2.spuq[i].mfc_cq_data0_RW =
+ in_be64(&priv2->spuq[i].mfc_cq_data0_RW);
+ csa->priv2.spuq[i].mfc_cq_data1_RW =
+ in_be64(&priv2->spuq[i].mfc_cq_data1_RW);
+ csa->priv2.spuq[i].mfc_cq_data2_RW =
+ in_be64(&priv2->spuq[i].mfc_cq_data2_RW);
+ csa->priv2.spuq[i].mfc_cq_data3_RW =
+ in_be64(&priv2->spuq[i].mfc_cq_data3_RW);
+ }
+ }
+}
+
+static inline void save_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 20:
+ * Save the PPU_QueryMask register
+ * in the CSA.
+ */
+ csa->prob.dma_querymask_RW = in_be32(&prob->dma_querymask_RW);
+}
+
+static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 21:
+ * Save the PPU_QueryType register
+ * in the CSA.
+ */
+ csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW);
+}
+
+static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save the Prxy_TagStatus register in the CSA.
+ *
+ * It is unnecessary to restore dma_tagstatus_R, however,
+ * dma_tagstatus_R in the CSA is accessed via backing_ops, so
+ * we must save it.
+ */
+ csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R);
+}
+
+static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 22:
+ * Save the MFC_CSR_TSQ register
+ * in the LSCSA.
+ */
+ csa->priv2.spu_tag_status_query_RW =
+ in_be64(&priv2->spu_tag_status_query_RW);
+}
+
+static inline void save_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 23:
+ * Save the MFC_CSR_CMD1 and MFC_CSR_CMD2
+ * registers in the CSA.
+ */
+ csa->priv2.spu_cmd_buf1_RW = in_be64(&priv2->spu_cmd_buf1_RW);
+ csa->priv2.spu_cmd_buf2_RW = in_be64(&priv2->spu_cmd_buf2_RW);
+}
+
+static inline void save_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 24:
+ * Save the MFC_CSR_ATO register in
+ * the CSA.
+ */
+ csa->priv2.spu_atomic_status_RW = in_be64(&priv2->spu_atomic_status_RW);
+}
+
+static inline void save_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 25:
+ * Save the MFC_TCLASS_ID register in
+ * the CSA.
+ */
+ csa->priv1.mfc_tclass_id_RW = spu_mfc_tclass_id_get(spu);
+}
+
+static inline void set_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 26:
+ * Restore, Step 23.
+ * Write the MFC_TCLASS_ID register with
+ * the value 0x10000000.
+ */
+ spu_mfc_tclass_id_set(spu, 0x10000000);
+ eieio();
+}
+
+static inline void purge_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 27:
+ * Restore, Step 14.
+ * Write MFC_CNTL[Pc]=1 (purge queue).
+ */
+ out_be64(&priv2->mfc_control_RW,
+ MFC_CNTL_PURGE_DMA_REQUEST |
+ MFC_CNTL_SUSPEND_MASK);
+ eieio();
+}
+
+static inline void wait_purge_complete(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 28:
+ * Poll MFC_CNTL[Ps] until value '11' is read
+ * (purge complete).
+ */
+ POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+ MFC_CNTL_PURGE_DMA_STATUS_MASK) ==
+ MFC_CNTL_PURGE_DMA_COMPLETE);
+}
+
+static inline void setup_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 30:
+ * Restore, Step 18:
+ * Write MFC_SR1 with MFC_SR1[D=0,S=1] and
+ * MFC_SR1[TL,R,Pr,T] set correctly for the
+ * OS specific environment.
+ *
+ * Implementation note: The SPU-side code
+ * for save/restore is privileged, so the
+ * MFC_SR1[Pr] bit is not set.
+ *
+ */
+ spu_mfc_sr1_set(spu, (MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+ MFC_STATE1_RELOCATE_MASK |
+ MFC_STATE1_BUS_TLBIE_MASK));
+}
+
+static inline void save_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 31:
+ * Save SPU_NPC in the CSA.
+ */
+ csa->prob.spu_npc_RW = in_be32(&prob->spu_npc_RW);
+}
+
+static inline void save_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 32:
+ * Save SPU_PrivCntl in the CSA.
+ */
+ csa->priv2.spu_privcntl_RW = in_be64(&priv2->spu_privcntl_RW);
+}
+
+static inline void reset_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 33:
+ * Restore, Step 16:
+ * Write SPU_PrivCntl[S,Le,A] fields reset to 0.
+ */
+ out_be64(&priv2->spu_privcntl_RW, 0UL);
+ eieio();
+}
+
+static inline void save_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 34:
+ * Save SPU_LSLR in the CSA.
+ */
+ csa->priv2.spu_lslr_RW = in_be64(&priv2->spu_lslr_RW);
+}
+
+static inline void reset_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 35:
+ * Restore, Step 17.
+ * Reset SPU_LSLR.
+ */
+ out_be64(&priv2->spu_lslr_RW, LS_ADDR_MASK);
+ eieio();
+}
+
+static inline void save_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 36:
+ * Save SPU_Cfg in the CSA.
+ */
+ csa->priv2.spu_cfg_RW = in_be64(&priv2->spu_cfg_RW);
+}
+
+static inline void save_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 37:
+ * Save PM_Trace_Tag_Wait_Mask in the CSA.
+ * Not performed by this implementation.
+ */
+}
+
+static inline void save_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 38:
+ * Save RA_GROUP_ID register and the
+ * RA_ENABLE reigster in the CSA.
+ */
+ csa->priv1.resource_allocation_groupID_RW =
+ spu_resource_allocation_groupID_get(spu);
+ csa->priv1.resource_allocation_enable_RW =
+ spu_resource_allocation_enable_get(spu);
+}
+
+static inline void save_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 39:
+ * Save MB_Stat register in the CSA.
+ */
+ csa->prob.mb_stat_R = in_be32(&prob->mb_stat_R);
+}
+
+static inline void save_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 40:
+ * Save the PPU_MB register in the CSA.
+ */
+ csa->prob.pu_mb_R = in_be32(&prob->pu_mb_R);
+}
+
+static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 41:
+ * Save the PPUINT_MB register in the CSA.
+ */
+ csa->priv2.puint_mb_R = in_be64(&priv2->puint_mb_R);
+}
+
+static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+ int i;
+
+ /* Save, Step 42:
+ */
+
+ /* Save CH 1, without channel count */
+ out_be64(&priv2->spu_chnlcntptr_RW, 1);
+ csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
+
+ /* Save the following CH: [0,3,4,24,25,27] */
+ for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ csa->spu_chnldata_RW[idx] = in_be64(&priv2->spu_chnldata_RW);
+ csa->spu_chnlcnt_RW[idx] = in_be64(&priv2->spu_chnlcnt_RW);
+ out_be64(&priv2->spu_chnldata_RW, 0UL);
+ out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+ eieio();
+ }
+}
+
+static inline void save_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int i;
+
+ /* Save, Step 43:
+ * Save SPU Read Mailbox Channel.
+ */
+ out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+ eieio();
+ csa->spu_chnlcnt_RW[29] = in_be64(&priv2->spu_chnlcnt_RW);
+ for (i = 0; i < 4; i++) {
+ csa->spu_mailbox_data[i] = in_be64(&priv2->spu_chnldata_RW);
+ }
+ out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+ eieio();
+}
+
+static inline void save_mfc_cmd(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 44:
+ * Save MFC_CMD Channel.
+ */
+ out_be64(&priv2->spu_chnlcntptr_RW, 21UL);
+ eieio();
+ csa->spu_chnlcnt_RW[21] = in_be64(&priv2->spu_chnlcnt_RW);
+ eieio();
+}
+
+static inline void reset_ch(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 ch_indices[4] = { 21UL, 23UL, 28UL, 30UL };
+ u64 ch_counts[4] = { 16UL, 1UL, 1UL, 1UL };
+ u64 idx;
+ int i;
+
+ /* Save, Step 45:
+ * Reset the following CH: [21, 23, 28, 30]
+ */
+ for (i = 0; i < 4; i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+ eieio();
+ }
+}
+
+static inline void resume_mfc_queue(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Save, Step 46:
+ * Restore, Step 25.
+ * Write MFC_CNTL[Sc]=0 (resume queue processing).
+ */
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_RESUME_DMA_QUEUE);
+}
+
+static inline void setup_mfc_slbs(struct spu_state *csa, struct spu *spu,
+ unsigned int *code, int code_size)
+{
+ /* Save, Step 47:
+ * Restore, Step 30.
+ * If MFC_SR1[R]=1, write 0 to SLB_Invalidate_All
+ * register, then initialize SLB_VSID and SLB_ESID
+ * to provide access to SPU context save code and
+ * LSCSA.
+ *
+ * This implementation places both the context
+ * switch code and LSCSA in kernel address space.
+ *
+ * Further this implementation assumes that the
+ * MFC_SR1[R]=1 (in other words, assume that
+ * translation is desired by OS environment).
+ */
+ spu_invalidate_slbs(spu);
+ spu_setup_kernel_slbs(spu, csa->lscsa, code, code_size);
+}
+
+static inline void set_switch_active(struct spu_state *csa, struct spu *spu)
+{
+ /* Save, Step 48:
+ * Restore, Step 23.
+ * Change the software context switch pending flag
+ * to context switch active. This implementation does
+ * not uses a switch active flag.
+ *
+ * Now that we have saved the mfc in the csa, we can add in the
+ * restart command if an exception occurred.
+ */
+ if (test_bit(SPU_CONTEXT_FAULT_PENDING, &spu->flags))
+ csa->priv2.mfc_control_RW |= MFC_CNTL_RESTART_DMA_COMMAND;
+ clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags);
+ mb();
+}
+
+static inline void enable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+ unsigned long class1_mask = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+ CLASS1_ENABLE_STORAGE_FAULT_INTR;
+
+ /* Save, Step 49:
+ * Restore, Step 22:
+ * Reset and then enable interrupts, as
+ * needed by OS.
+ *
+ * This implementation enables only class1
+ * (translation) interrupts.
+ */
+ spin_lock_irq(&spu->register_lock);
+ spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+ spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+ spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+ spu_int_mask_set(spu, 0, 0ul);
+ spu_int_mask_set(spu, 1, class1_mask);
+ spu_int_mask_set(spu, 2, 0ul);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static inline int send_mfc_dma(struct spu *spu, unsigned long ea,
+ unsigned int ls_offset, unsigned int size,
+ unsigned int tag, unsigned int rclass,
+ unsigned int cmd)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ union mfc_tag_size_class_cmd command;
+ unsigned int transfer_size;
+ volatile unsigned int status = 0x0;
+
+ while (size > 0) {
+ transfer_size =
+ (size > MFC_MAX_DMA_SIZE) ? MFC_MAX_DMA_SIZE : size;
+ command.u.mfc_size = transfer_size;
+ command.u.mfc_tag = tag;
+ command.u.mfc_rclassid = rclass;
+ command.u.mfc_cmd = cmd;
+ do {
+ out_be32(&prob->mfc_lsa_W, ls_offset);
+ out_be64(&prob->mfc_ea_W, ea);
+ out_be64(&prob->mfc_union_W.all64, command.all64);
+ status =
+ in_be32(&prob->mfc_union_W.by32.mfc_class_cmd32);
+ if (unlikely(status & 0x2)) {
+ cpu_relax();
+ }
+ } while (status & 0x3);
+ size -= transfer_size;
+ ea += transfer_size;
+ ls_offset += transfer_size;
+ }
+ return 0;
+}
+
+static inline void save_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+ unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+ unsigned int ls_offset = 0x0;
+ unsigned int size = 16384;
+ unsigned int tag = 0;
+ unsigned int rclass = 0;
+ unsigned int cmd = MFC_PUT_CMD;
+
+ /* Save, Step 50:
+ * Issue a DMA command to copy the first 16K bytes
+ * of local storage to the CSA.
+ */
+ send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 51:
+ * Restore, Step 31.
+ * Write SPU_NPC[IE]=0 and SPU_NPC[LSA] to entry
+ * point address of context save code in local
+ * storage.
+ *
+ * This implementation uses SPU-side save/restore
+ * programs with entry points at LSA of 0.
+ */
+ out_be32(&prob->spu_npc_RW, 0);
+ eieio();
+}
+
+static inline void set_signot1(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ union {
+ u64 ull;
+ u32 ui[2];
+ } addr64;
+
+ /* Save, Step 52:
+ * Restore, Step 32:
+ * Write SPU_Sig_Notify_1 register with upper 32-bits
+ * of the CSA.LSCSA effective address.
+ */
+ addr64.ull = (u64) csa->lscsa;
+ out_be32(&prob->signal_notify1, addr64.ui[0]);
+ eieio();
+}
+
+static inline void set_signot2(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ union {
+ u64 ull;
+ u32 ui[2];
+ } addr64;
+
+ /* Save, Step 53:
+ * Restore, Step 33:
+ * Write SPU_Sig_Notify_2 register with lower 32-bits
+ * of the CSA.LSCSA effective address.
+ */
+ addr64.ull = (u64) csa->lscsa;
+ out_be32(&prob->signal_notify2, addr64.ui[1]);
+ eieio();
+}
+
+static inline void send_save_code(struct spu_state *csa, struct spu *spu)
+{
+ unsigned long addr = (unsigned long)&spu_save_code[0];
+ unsigned int ls_offset = 0x0;
+ unsigned int size = sizeof(spu_save_code);
+ unsigned int tag = 0;
+ unsigned int rclass = 0;
+ unsigned int cmd = MFC_GETFS_CMD;
+
+ /* Save, Step 54:
+ * Issue a DMA command to copy context save code
+ * to local storage and start SPU.
+ */
+ send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void set_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Save, Step 55:
+ * Restore, Step 38.
+ * Write PPU_QueryMask=1 (enable Tag Group 0)
+ * and issue eieio instruction.
+ */
+ out_be32(&prob->dma_querymask_RW, MFC_TAGID_TO_TAGMASK(0));
+ eieio();
+}
+
+static inline void wait_tag_complete(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 mask = MFC_TAGID_TO_TAGMASK(0);
+ unsigned long flags;
+
+ /* Save, Step 56:
+ * Restore, Step 39.
+ * Restore, Step 39.
+ * Restore, Step 46.
+ * Poll PPU_TagStatus[gn] until 01 (Tag group 0 complete)
+ * or write PPU_QueryType[TS]=01 and wait for Tag Group
+ * Complete Interrupt. Write INT_Stat_Class0 or
+ * INT_Stat_Class2 with value of 'handled'.
+ */
+ POLL_WHILE_FALSE(in_be32(&prob->dma_tagstatus_R) & mask);
+
+ local_irq_save(flags);
+ spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+ spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+ local_irq_restore(flags);
+}
+
+static inline void wait_spu_stopped(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ unsigned long flags;
+
+ /* Save, Step 57:
+ * Restore, Step 40.
+ * Poll until SPU_Status[R]=0 or wait for SPU Class 0
+ * or SPU Class 2 interrupt. Write INT_Stat_class0
+ * or INT_Stat_class2 with value of handled.
+ */
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+ local_irq_save(flags);
+ spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+ spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+ local_irq_restore(flags);
+}
+
+static inline int check_save_status(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 complete;
+
+ /* Save, Step 54:
+ * If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+ * context save succeeded, otherwise context save
+ * failed.
+ */
+ complete = ((SPU_SAVE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+ SPU_STATUS_STOPPED_BY_STOP);
+ return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 4:
+ * If required, notify the "using application" that
+ * the SPU task has been terminated. TBD.
+ */
+}
+
+static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
+ struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 7:
+ * Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
+ * the queue and halt the decrementer.
+ */
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
+ MFC_CNTL_DECREMENTER_HALTED);
+ eieio();
+}
+
+static inline void wait_suspend_mfc_complete(struct spu_state *csa,
+ struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 8:
+ * Restore, Step 47.
+ * Poll MFC_CNTL[Ss] until 11 is returned.
+ */
+ POLL_WHILE_FALSE((in_be64(&priv2->mfc_control_RW) &
+ MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
+ MFC_CNTL_SUSPEND_COMPLETE);
+}
+
+static inline int suspend_spe(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 9:
+ * If SPU_Status[R]=1, stop SPU execution
+ * and wait for stop to complete.
+ *
+ * Returns 1 if SPU_Status[R]=1 on entry.
+ * 0 otherwise
+ */
+ if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) {
+ if (in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_EXIT_STATUS) {
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+ if ((in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_LOAD_STATUS)
+ || (in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_STATE)) {
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ out_be32(&prob->spu_runcntl_RW, 0x2);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+ if (in_be32(&prob->spu_status_R) &
+ SPU_STATUS_WAITING_FOR_CHANNEL) {
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 10:
+ * If SPU_Status[R]=0 and SPU_Status[E,L,IS]=1,
+ * release SPU from isolate state.
+ */
+ if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) {
+ if (in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_EXIT_STATUS) {
+ spu_mfc_sr1_set(spu,
+ MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+ eieio();
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+ if ((in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_LOAD_STATUS)
+ || (in_be32(&prob->spu_status_R) &
+ SPU_STATUS_ISOLATED_STATE)) {
+ spu_mfc_sr1_set(spu,
+ MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+ eieio();
+ out_be32(&prob->spu_runcntl_RW, 0x2);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+ }
+}
+
+static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+ u64 idx;
+ int i;
+
+ /* Restore, Step 20:
+ */
+
+ /* Reset CH 1 */
+ out_be64(&priv2->spu_chnlcntptr_RW, 1);
+ out_be64(&priv2->spu_chnldata_RW, 0UL);
+
+ /* Reset the following CH: [0,3,4,24,25,27] */
+ for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ out_be64(&priv2->spu_chnldata_RW, 0UL);
+ out_be64(&priv2->spu_chnlcnt_RW, 0UL);
+ eieio();
+ }
+}
+
+static inline void reset_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 ch_indices[5] = { 21UL, 23UL, 28UL, 29UL, 30UL };
+ u64 ch_counts[5] = { 16UL, 1UL, 1UL, 0UL, 1UL };
+ u64 idx;
+ int i;
+
+ /* Restore, Step 21:
+ * Reset the following CH: [21, 23, 28, 29, 30]
+ */
+ for (i = 0; i < 5; i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+ eieio();
+ }
+}
+
+static inline void setup_spu_status_part1(struct spu_state *csa,
+ struct spu *spu)
+{
+ u32 status_P = SPU_STATUS_STOPPED_BY_STOP;
+ u32 status_I = SPU_STATUS_INVALID_INSTR;
+ u32 status_H = SPU_STATUS_STOPPED_BY_HALT;
+ u32 status_S = SPU_STATUS_SINGLE_STEP;
+ u32 status_S_I = SPU_STATUS_SINGLE_STEP | SPU_STATUS_INVALID_INSTR;
+ u32 status_S_P = SPU_STATUS_SINGLE_STEP | SPU_STATUS_STOPPED_BY_STOP;
+ u32 status_P_H = SPU_STATUS_STOPPED_BY_HALT |SPU_STATUS_STOPPED_BY_STOP;
+ u32 status_P_I = SPU_STATUS_STOPPED_BY_STOP |SPU_STATUS_INVALID_INSTR;
+ u32 status_code;
+
+ /* Restore, Step 27:
+ * If the CSA.SPU_Status[I,S,H,P]=1 then add the correct
+ * instruction sequence to the end of the SPU based restore
+ * code (after the "context restored" stop and signal) to
+ * restore the correct SPU status.
+ *
+ * NOTE: Rather than modifying the SPU executable, we
+ * instead add a new 'stopped_status' field to the
+ * LSCSA. The SPU-side restore reads this field and
+ * takes the appropriate action when exiting.
+ */
+
+ status_code =
+ (csa->prob.spu_status_R >> SPU_STOP_STATUS_SHIFT) & 0xFFFF;
+ if ((csa->prob.spu_status_R & status_P_I) == status_P_I) {
+
+ /* SPU_Status[P,I]=1 - Illegal Instruction followed
+ * by Stop and Signal instruction, followed by 'br -4'.
+ *
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_I;
+ csa->lscsa->stopped_status.slot[1] = status_code;
+
+ } else if ((csa->prob.spu_status_R & status_P_H) == status_P_H) {
+
+ /* SPU_Status[P,H]=1 - Halt Conditional, followed
+ * by Stop and Signal instruction, followed by
+ * 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P_H;
+ csa->lscsa->stopped_status.slot[1] = status_code;
+
+ } else if ((csa->prob.spu_status_R & status_S_P) == status_S_P) {
+
+ /* SPU_Status[S,P]=1 - Stop and Signal instruction
+ * followed by 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_P;
+ csa->lscsa->stopped_status.slot[1] = status_code;
+
+ } else if ((csa->prob.spu_status_R & status_S_I) == status_S_I) {
+
+ /* SPU_Status[S,I]=1 - Illegal instruction followed
+ * by 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S_I;
+ csa->lscsa->stopped_status.slot[1] = status_code;
+
+ } else if ((csa->prob.spu_status_R & status_P) == status_P) {
+
+ /* SPU_Status[P]=1 - Stop and Signal instruction
+ * followed by 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_P;
+ csa->lscsa->stopped_status.slot[1] = status_code;
+
+ } else if ((csa->prob.spu_status_R & status_H) == status_H) {
+
+ /* SPU_Status[H]=1 - Halt Conditional, followed
+ * by 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_H;
+
+ } else if ((csa->prob.spu_status_R & status_S) == status_S) {
+
+ /* SPU_Status[S]=1 - Two nop instructions.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_S;
+
+ } else if ((csa->prob.spu_status_R & status_I) == status_I) {
+
+ /* SPU_Status[I]=1 - Illegal instruction followed
+ * by 'br -4'.
+ */
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_I;
+
+ }
+}
+
+static inline void setup_spu_status_part2(struct spu_state *csa,
+ struct spu *spu)
+{
+ u32 mask;
+
+ /* Restore, Step 28:
+ * If the CSA.SPU_Status[I,S,H,P,R]=0 then
+ * add a 'br *' instruction to the end of
+ * the SPU based restore code.
+ *
+ * NOTE: Rather than modifying the SPU executable, we
+ * instead add a new 'stopped_status' field to the
+ * LSCSA. The SPU-side restore reads this field and
+ * takes the appropriate action when exiting.
+ */
+ mask = SPU_STATUS_INVALID_INSTR |
+ SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT |
+ SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+ if (!(csa->prob.spu_status_R & mask)) {
+ csa->lscsa->stopped_status.slot[0] = SPU_STOPPED_STATUS_R;
+ }
+}
+
+static inline void restore_mfc_rag(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 29:
+ * Restore RA_GROUP_ID register and the
+ * RA_ENABLE reigster from the CSA.
+ */
+ spu_resource_allocation_groupID_set(spu,
+ csa->priv1.resource_allocation_groupID_RW);
+ spu_resource_allocation_enable_set(spu,
+ csa->priv1.resource_allocation_enable_RW);
+}
+
+static inline void send_restore_code(struct spu_state *csa, struct spu *spu)
+{
+ unsigned long addr = (unsigned long)&spu_restore_code[0];
+ unsigned int ls_offset = 0x0;
+ unsigned int size = sizeof(spu_restore_code);
+ unsigned int tag = 0;
+ unsigned int rclass = 0;
+ unsigned int cmd = MFC_GETFS_CMD;
+
+ /* Restore, Step 37:
+ * Issue MFC DMA command to copy context
+ * restore code to local storage.
+ */
+ send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void setup_decr(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 34:
+ * If CSA.MFC_CNTL[Ds]=1 (decrementer was
+ * running) then adjust decrementer, set
+ * decrementer running status in LSCSA,
+ * and set decrementer "wrapped" status
+ * in LSCSA.
+ */
+ if (csa->priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING) {
+ cycles_t resume_time = get_cycles();
+ cycles_t delta_time = resume_time - csa->suspend_time;
+
+ csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
+ if (csa->lscsa->decr.slot[0] < delta_time) {
+ csa->lscsa->decr_status.slot[0] |=
+ SPU_DECR_STATUS_WRAPPED;
+ }
+
+ csa->lscsa->decr.slot[0] -= delta_time;
+ } else {
+ csa->lscsa->decr_status.slot[0] = 0;
+ }
+}
+
+static inline void setup_ppu_mb(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 35:
+ * Copy the CSA.PU_MB data into the LSCSA.
+ */
+ csa->lscsa->ppu_mb.slot[0] = csa->prob.pu_mb_R;
+}
+
+static inline void setup_ppuint_mb(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 36:
+ * Copy the CSA.PUINT_MB data into the LSCSA.
+ */
+ csa->lscsa->ppuint_mb.slot[0] = csa->priv2.puint_mb_R;
+}
+
+static inline int check_restore_status(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 complete;
+
+ /* Restore, Step 40:
+ * If SPU_Status[P]=1 and SPU_Status[SC] = "success",
+ * context restore succeeded, otherwise context restore
+ * failed.
+ */
+ complete = ((SPU_RESTORE_COMPLETE << SPU_STOP_STATUS_SHIFT) |
+ SPU_STATUS_STOPPED_BY_STOP);
+ return (in_be32(&prob->spu_status_R) != complete) ? 1 : 0;
+}
+
+static inline void restore_spu_privcntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 41:
+ * Restore SPU_PrivCntl from the CSA.
+ */
+ out_be64(&priv2->spu_privcntl_RW, csa->priv2.spu_privcntl_RW);
+ eieio();
+}
+
+static inline void restore_status_part1(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 mask;
+
+ /* Restore, Step 42:
+ * If any CSA.SPU_Status[I,S,H,P]=1, then
+ * restore the error or single step state.
+ */
+ mask = SPU_STATUS_INVALID_INSTR |
+ SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
+ if (csa->prob.spu_status_R & mask) {
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+}
+
+static inline void restore_status_part2(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ u32 mask;
+
+ /* Restore, Step 43:
+ * If all CSA.SPU_Status[I,S,H,P,R]=0 then write
+ * SPU_RunCntl[R0R1]='01', wait for SPU_Status[R]=1,
+ * then write '00' to SPU_RunCntl[R0R1] and wait
+ * for SPU_Status[R]=0.
+ */
+ mask = SPU_STATUS_INVALID_INSTR |
+ SPU_STATUS_SINGLE_STEP |
+ SPU_STATUS_STOPPED_BY_HALT |
+ SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_RUNNING;
+ if (!(csa->prob.spu_status_R & mask)) {
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+ eieio();
+ POLL_WHILE_FALSE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ eieio();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) &
+ SPU_STATUS_RUNNING);
+ }
+}
+
+static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
+{
+ unsigned long addr = (unsigned long)&csa->lscsa->ls[0];
+ unsigned int ls_offset = 0x0;
+ unsigned int size = 16384;
+ unsigned int tag = 0;
+ unsigned int rclass = 0;
+ unsigned int cmd = MFC_GET_CMD;
+
+ /* Restore, Step 44:
+ * Issue a DMA command to restore the first
+ * 16kb of local storage from CSA.
+ */
+ send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
+}
+
+static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 47.
+ * Write MFC_Cntl[Sc,Sm]='1','0' to suspend
+ * the queue.
+ */
+ out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+ eieio();
+}
+
+static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 49:
+ * Write INT_MASK_class0 with value of 0.
+ * Write INT_MASK_class1 with value of 0.
+ * Write INT_MASK_class2 with value of 0.
+ * Write INT_STAT_class0 with value of -1.
+ * Write INT_STAT_class1 with value of -1.
+ * Write INT_STAT_class2 with value of -1.
+ */
+ spin_lock_irq(&spu->register_lock);
+ spu_int_mask_set(spu, 0, 0ul);
+ spu_int_mask_set(spu, 1, 0ul);
+ spu_int_mask_set(spu, 2, 0ul);
+ spu_int_stat_clear(spu, 0, CLASS0_INTR_MASK);
+ spu_int_stat_clear(spu, 1, CLASS1_INTR_MASK);
+ spu_int_stat_clear(spu, 2, CLASS2_INTR_MASK);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static inline void restore_mfc_queues(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int i;
+
+ /* Restore, Step 50:
+ * If MFC_Cntl[Se]!=0 then restore
+ * MFC command queues.
+ */
+ if ((csa->priv2.mfc_control_RW & MFC_CNTL_DMA_QUEUES_EMPTY_MASK) == 0) {
+ for (i = 0; i < 8; i++) {
+ out_be64(&priv2->puq[i].mfc_cq_data0_RW,
+ csa->priv2.puq[i].mfc_cq_data0_RW);
+ out_be64(&priv2->puq[i].mfc_cq_data1_RW,
+ csa->priv2.puq[i].mfc_cq_data1_RW);
+ out_be64(&priv2->puq[i].mfc_cq_data2_RW,
+ csa->priv2.puq[i].mfc_cq_data2_RW);
+ out_be64(&priv2->puq[i].mfc_cq_data3_RW,
+ csa->priv2.puq[i].mfc_cq_data3_RW);
+ }
+ for (i = 0; i < 16; i++) {
+ out_be64(&priv2->spuq[i].mfc_cq_data0_RW,
+ csa->priv2.spuq[i].mfc_cq_data0_RW);
+ out_be64(&priv2->spuq[i].mfc_cq_data1_RW,
+ csa->priv2.spuq[i].mfc_cq_data1_RW);
+ out_be64(&priv2->spuq[i].mfc_cq_data2_RW,
+ csa->priv2.spuq[i].mfc_cq_data2_RW);
+ out_be64(&priv2->spuq[i].mfc_cq_data3_RW,
+ csa->priv2.spuq[i].mfc_cq_data3_RW);
+ }
+ }
+ eieio();
+}
+
+static inline void restore_ppu_querymask(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 51:
+ * Restore the PPU_QueryMask register from CSA.
+ */
+ out_be32(&prob->dma_querymask_RW, csa->prob.dma_querymask_RW);
+ eieio();
+}
+
+static inline void restore_ppu_querytype(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 52:
+ * Restore the PPU_QueryType register from CSA.
+ */
+ out_be32(&prob->dma_querytype_RW, csa->prob.dma_querytype_RW);
+ eieio();
+}
+
+static inline void restore_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 53:
+ * Restore the MFC_CSR_TSQ register from CSA.
+ */
+ out_be64(&priv2->spu_tag_status_query_RW,
+ csa->priv2.spu_tag_status_query_RW);
+ eieio();
+}
+
+static inline void restore_mfc_csr_cmd(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 54:
+ * Restore the MFC_CSR_CMD1 and MFC_CSR_CMD2
+ * registers from CSA.
+ */
+ out_be64(&priv2->spu_cmd_buf1_RW, csa->priv2.spu_cmd_buf1_RW);
+ out_be64(&priv2->spu_cmd_buf2_RW, csa->priv2.spu_cmd_buf2_RW);
+ eieio();
+}
+
+static inline void restore_mfc_csr_ato(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 55:
+ * Restore the MFC_CSR_ATO register from CSA.
+ */
+ out_be64(&priv2->spu_atomic_status_RW, csa->priv2.spu_atomic_status_RW);
+}
+
+static inline void restore_mfc_tclass_id(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 56:
+ * Restore the MFC_TCLASS_ID register from CSA.
+ */
+ spu_mfc_tclass_id_set(spu, csa->priv1.mfc_tclass_id_RW);
+ eieio();
+}
+
+static inline void set_llr_event(struct spu_state *csa, struct spu *spu)
+{
+ u64 ch0_cnt, ch0_data;
+ u64 ch1_data;
+
+ /* Restore, Step 57:
+ * Set the Lock Line Reservation Lost Event by:
+ * 1. OR CSA.SPU_Event_Status with bit 21 (Lr) set to 1.
+ * 2. If CSA.SPU_Channel_0_Count=0 and
+ * CSA.SPU_Wr_Event_Mask[Lr]=1 and
+ * CSA.SPU_Event_Status[Lr]=0 then set
+ * CSA.SPU_Event_Status_Count=1.
+ */
+ ch0_cnt = csa->spu_chnlcnt_RW[0];
+ ch0_data = csa->spu_chnldata_RW[0];
+ ch1_data = csa->spu_chnldata_RW[1];
+ csa->spu_chnldata_RW[0] |= MFC_LLR_LOST_EVENT;
+ if ((ch0_cnt == 0) && !(ch0_data & MFC_LLR_LOST_EVENT) &&
+ (ch1_data & MFC_LLR_LOST_EVENT)) {
+ csa->spu_chnlcnt_RW[0] = 1;
+ }
+}
+
+static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 58:
+ * If the status of the CSA software decrementer
+ * "wrapped" flag is set, OR in a '1' to
+ * CSA.SPU_Event_Status[Tm].
+ */
+ if (!(csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED))
+ return;
+
+ if ((csa->spu_chnlcnt_RW[0] == 0) &&
+ (csa->spu_chnldata_RW[1] & 0x20) &&
+ !(csa->spu_chnldata_RW[0] & 0x20))
+ csa->spu_chnlcnt_RW[0] = 1;
+
+ csa->spu_chnldata_RW[0] |= 0x20;
+}
+
+static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+ int i;
+
+ /* Restore, Step 59:
+ * Restore the following CH: [0,3,4,24,25,27]
+ */
+ for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[idx]);
+ out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[idx]);
+ eieio();
+ }
+}
+
+static inline void restore_ch_part2(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ u64 ch_indices[3] = { 9UL, 21UL, 23UL };
+ u64 ch_counts[3] = { 1UL, 16UL, 1UL };
+ u64 idx;
+ int i;
+
+ /* Restore, Step 60:
+ * Restore the following CH: [9,21,23].
+ */
+ ch_counts[0] = 1UL;
+ ch_counts[1] = csa->spu_chnlcnt_RW[21];
+ ch_counts[2] = 1UL;
+ for (i = 0; i < 3; i++) {
+ idx = ch_indices[i];
+ out_be64(&priv2->spu_chnlcntptr_RW, idx);
+ eieio();
+ out_be64(&priv2->spu_chnlcnt_RW, ch_counts[i]);
+ eieio();
+ }
+}
+
+static inline void restore_spu_lslr(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 61:
+ * Restore the SPU_LSLR register from CSA.
+ */
+ out_be64(&priv2->spu_lslr_RW, csa->priv2.spu_lslr_RW);
+ eieio();
+}
+
+static inline void restore_spu_cfg(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 62:
+ * Restore the SPU_Cfg register from CSA.
+ */
+ out_be64(&priv2->spu_cfg_RW, csa->priv2.spu_cfg_RW);
+ eieio();
+}
+
+static inline void restore_pm_trace(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 63:
+ * Restore PM_Trace_Tag_Wait_Mask from CSA.
+ * Not performed by this implementation.
+ */
+}
+
+static inline void restore_spu_npc(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 64:
+ * Restore SPU_NPC from CSA.
+ */
+ out_be32(&prob->spu_npc_RW, csa->prob.spu_npc_RW);
+ eieio();
+}
+
+static inline void restore_spu_mb(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+ int i;
+
+ /* Restore, Step 65:
+ * Restore MFC_RdSPU_MB from CSA.
+ */
+ out_be64(&priv2->spu_chnlcntptr_RW, 29UL);
+ eieio();
+ out_be64(&priv2->spu_chnlcnt_RW, csa->spu_chnlcnt_RW[29]);
+ for (i = 0; i < 4; i++) {
+ out_be64(&priv2->spu_chnldata_RW, csa->spu_mailbox_data[i]);
+ }
+ eieio();
+}
+
+static inline void check_ppu_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 66:
+ * If CSA.MB_Stat[P]=0 (mailbox empty) then
+ * read from the PPU_MB register.
+ */
+ if ((csa->prob.mb_stat_R & 0xFF) == 0) {
+ in_be32(&prob->pu_mb_R);
+ eieio();
+ }
+}
+
+static inline void check_ppuint_mb_stat(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 66:
+ * If CSA.MB_Stat[I]=0 (mailbox empty) then
+ * read from the PPUINT_MB register.
+ */
+ if ((csa->prob.mb_stat_R & 0xFF0000) == 0) {
+ in_be64(&priv2->puint_mb_R);
+ eieio();
+ spu_int_stat_clear(spu, 2, CLASS2_ENABLE_MAILBOX_INTR);
+ eieio();
+ }
+}
+
+static inline void restore_mfc_sr1(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 69:
+ * Restore the MFC_SR1 register from CSA.
+ */
+ spu_mfc_sr1_set(spu, csa->priv1.mfc_sr1_RW);
+ eieio();
+}
+
+static inline void set_int_route(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_context *ctx = spu->ctx;
+
+ spu_cpu_affinity_set(spu, ctx->last_ran);
+}
+
+static inline void restore_other_spu_access(struct spu_state *csa,
+ struct spu *spu)
+{
+ /* Restore, Step 70:
+ * Restore other SPU mappings to this SPU. TBD.
+ */
+}
+
+static inline void restore_spu_runcntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ /* Restore, Step 71:
+ * If CSA.SPU_Status[R]=1 then write
+ * SPU_RunCntl[R0R1]='01'.
+ */
+ if (csa->prob.spu_status_R & SPU_STATUS_RUNNING) {
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_RUNNABLE);
+ eieio();
+ }
+}
+
+static inline void restore_mfc_cntl(struct spu_state *csa, struct spu *spu)
+{
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Restore, Step 72:
+ * Restore the MFC_CNTL register for the CSA.
+ */
+ out_be64(&priv2->mfc_control_RW, csa->priv2.mfc_control_RW);
+ eieio();
+
+ /*
+ * The queue is put back into the same state that was evident prior to
+ * the context switch. The suspend flag is added to the saved state in
+ * the csa, if the operational state was suspending or suspended. In
+ * this case, the code that suspended the mfc is responsible for
+ * continuing it. Note that SPE faults do not change the operational
+ * state of the spu.
+ */
+}
+
+static inline void enable_user_access(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 73:
+ * Enable user-space access (if provided) to this
+ * SPU by mapping the virtual pages assigned to
+ * the SPU memory-mapped I/O (MMIO) for problem
+ * state. TBD.
+ */
+}
+
+static inline void reset_switch_active(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 74:
+ * Reset the "context switch active" flag.
+ * Not performed by this implementation.
+ */
+}
+
+static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu)
+{
+ /* Restore, Step 75:
+ * Re-enable SPU interrupts.
+ */
+ spin_lock_irq(&spu->register_lock);
+ spu_int_mask_set(spu, 0, csa->priv1.int_mask_class0_RW);
+ spu_int_mask_set(spu, 1, csa->priv1.int_mask_class1_RW);
+ spu_int_mask_set(spu, 2, csa->priv1.int_mask_class2_RW);
+ spin_unlock_irq(&spu->register_lock);
+}
+
+static int quiece_spu(struct spu_state *prev, struct spu *spu)
+{
+ /*
+ * Combined steps 2-18 of SPU context save sequence, which
+ * quiesce the SPU state (disable SPU execution, MFC command
+ * queues, decrementer, SPU interrupts, etc.).
+ *
+ * Returns 0 on success.
+ * 2 if failed step 2.
+ * 6 if failed step 6.
+ */
+
+ if (check_spu_isolate(prev, spu)) { /* Step 2. */
+ return 2;
+ }
+ disable_interrupts(prev, spu); /* Step 3. */
+ set_watchdog_timer(prev, spu); /* Step 4. */
+ inhibit_user_access(prev, spu); /* Step 5. */
+ if (check_spu_isolate(prev, spu)) { /* Step 6. */
+ return 6;
+ }
+ set_switch_pending(prev, spu); /* Step 7. */
+ save_mfc_cntl(prev, spu); /* Step 8. */
+ save_spu_runcntl(prev, spu); /* Step 9. */
+ save_mfc_sr1(prev, spu); /* Step 10. */
+ save_spu_status(prev, spu); /* Step 11. */
+ save_mfc_stopped_status(prev, spu); /* Step 12. */
+ halt_mfc_decr(prev, spu); /* Step 13. */
+ save_timebase(prev, spu); /* Step 14. */
+ remove_other_spu_access(prev, spu); /* Step 15. */
+ do_mfc_mssync(prev, spu); /* Step 16. */
+ issue_mfc_tlbie(prev, spu); /* Step 17. */
+ handle_pending_interrupts(prev, spu); /* Step 18. */
+
+ return 0;
+}
+
+static void save_csa(struct spu_state *prev, struct spu *spu)
+{
+ /*
+ * Combine steps 19-44 of SPU context save sequence, which
+ * save regions of the privileged & problem state areas.
+ */
+
+ save_mfc_queues(prev, spu); /* Step 19. */
+ save_ppu_querymask(prev, spu); /* Step 20. */
+ save_ppu_querytype(prev, spu); /* Step 21. */
+ save_ppu_tagstatus(prev, spu); /* NEW. */
+ save_mfc_csr_tsq(prev, spu); /* Step 22. */
+ save_mfc_csr_cmd(prev, spu); /* Step 23. */
+ save_mfc_csr_ato(prev, spu); /* Step 24. */
+ save_mfc_tclass_id(prev, spu); /* Step 25. */
+ set_mfc_tclass_id(prev, spu); /* Step 26. */
+ save_mfc_cmd(prev, spu); /* Step 26a - moved from 44. */
+ purge_mfc_queue(prev, spu); /* Step 27. */
+ wait_purge_complete(prev, spu); /* Step 28. */
+ setup_mfc_sr1(prev, spu); /* Step 30. */
+ save_spu_npc(prev, spu); /* Step 31. */
+ save_spu_privcntl(prev, spu); /* Step 32. */
+ reset_spu_privcntl(prev, spu); /* Step 33. */
+ save_spu_lslr(prev, spu); /* Step 34. */
+ reset_spu_lslr(prev, spu); /* Step 35. */
+ save_spu_cfg(prev, spu); /* Step 36. */
+ save_pm_trace(prev, spu); /* Step 37. */
+ save_mfc_rag(prev, spu); /* Step 38. */
+ save_ppu_mb_stat(prev, spu); /* Step 39. */
+ save_ppu_mb(prev, spu); /* Step 40. */
+ save_ppuint_mb(prev, spu); /* Step 41. */
+ save_ch_part1(prev, spu); /* Step 42. */
+ save_spu_mb(prev, spu); /* Step 43. */
+ reset_ch(prev, spu); /* Step 45. */
+}
+
+static void save_lscsa(struct spu_state *prev, struct spu *spu)
+{
+ /*
+ * Perform steps 46-57 of SPU context save sequence,
+ * which save regions of the local store and register
+ * file.
+ */
+
+ resume_mfc_queue(prev, spu); /* Step 46. */
+ /* Step 47. */
+ setup_mfc_slbs(prev, spu, spu_save_code, sizeof(spu_save_code));
+ set_switch_active(prev, spu); /* Step 48. */
+ enable_interrupts(prev, spu); /* Step 49. */
+ save_ls_16kb(prev, spu); /* Step 50. */
+ set_spu_npc(prev, spu); /* Step 51. */
+ set_signot1(prev, spu); /* Step 52. */
+ set_signot2(prev, spu); /* Step 53. */
+ send_save_code(prev, spu); /* Step 54. */
+ set_ppu_querymask(prev, spu); /* Step 55. */
+ wait_tag_complete(prev, spu); /* Step 56. */
+ wait_spu_stopped(prev, spu); /* Step 57. */
+}
+
+static void force_spu_isolate_exit(struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+ struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+ /* Stop SPE execution and wait for completion. */
+ out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP);
+ iobarrier_rw();
+ POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING);
+
+ /* Restart SPE master runcntl. */
+ spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK);
+ iobarrier_w();
+
+ /* Initiate isolate exit request and wait for completion. */
+ out_be64(&priv2->spu_privcntl_RW, 4LL);
+ iobarrier_w();
+ out_be32(&prob->spu_runcntl_RW, 2);
+ iobarrier_rw();
+ POLL_WHILE_FALSE((in_be32(&prob->spu_status_R)
+ & SPU_STATUS_STOPPED_BY_STOP));
+
+ /* Reset load request to normal. */
+ out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL);
+ iobarrier_w();
+}
+
+/**
+ * stop_spu_isolate
+ * Check SPU run-control state and force isolated
+ * exit function as necessary.
+ */
+static void stop_spu_isolate(struct spu *spu)
+{
+ struct spu_problem __iomem *prob = spu->problem;
+
+ if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) {
+ /* The SPU is in isolated state; the only way
+ * to get it out is to perform an isolated
+ * exit (clean) operation.
+ */
+ force_spu_isolate_exit(spu);
+ }
+}
+
+static void harvest(struct spu_state *prev, struct spu *spu)
+{
+ /*
+ * Perform steps 2-25 of SPU context restore sequence,
+ * which resets an SPU either after a failed save, or
+ * when using SPU for first time.
+ */
+
+ disable_interrupts(prev, spu); /* Step 2. */
+ inhibit_user_access(prev, spu); /* Step 3. */
+ terminate_spu_app(prev, spu); /* Step 4. */
+ set_switch_pending(prev, spu); /* Step 5. */
+ stop_spu_isolate(spu); /* NEW. */
+ remove_other_spu_access(prev, spu); /* Step 6. */
+ suspend_mfc_and_halt_decr(prev, spu); /* Step 7. */
+ wait_suspend_mfc_complete(prev, spu); /* Step 8. */
+ if (!suspend_spe(prev, spu)) /* Step 9. */
+ clear_spu_status(prev, spu); /* Step 10. */
+ do_mfc_mssync(prev, spu); /* Step 11. */
+ issue_mfc_tlbie(prev, spu); /* Step 12. */
+ handle_pending_interrupts(prev, spu); /* Step 13. */
+ purge_mfc_queue(prev, spu); /* Step 14. */
+ wait_purge_complete(prev, spu); /* Step 15. */
+ reset_spu_privcntl(prev, spu); /* Step 16. */
+ reset_spu_lslr(prev, spu); /* Step 17. */
+ setup_mfc_sr1(prev, spu); /* Step 18. */
+ spu_invalidate_slbs(spu); /* Step 19. */
+ reset_ch_part1(prev, spu); /* Step 20. */
+ reset_ch_part2(prev, spu); /* Step 21. */
+ enable_interrupts(prev, spu); /* Step 22. */
+ set_switch_active(prev, spu); /* Step 23. */
+ set_mfc_tclass_id(prev, spu); /* Step 24. */
+ resume_mfc_queue(prev, spu); /* Step 25. */
+}
+
+static void restore_lscsa(struct spu_state *next, struct spu *spu)
+{
+ /*
+ * Perform steps 26-40 of SPU context restore sequence,
+ * which restores regions of the local store and register
+ * file.
+ */
+
+ set_watchdog_timer(next, spu); /* Step 26. */
+ setup_spu_status_part1(next, spu); /* Step 27. */
+ setup_spu_status_part2(next, spu); /* Step 28. */
+ restore_mfc_rag(next, spu); /* Step 29. */
+ /* Step 30. */
+ setup_mfc_slbs(next, spu, spu_restore_code, sizeof(spu_restore_code));
+ set_spu_npc(next, spu); /* Step 31. */
+ set_signot1(next, spu); /* Step 32. */
+ set_signot2(next, spu); /* Step 33. */
+ setup_decr(next, spu); /* Step 34. */
+ setup_ppu_mb(next, spu); /* Step 35. */
+ setup_ppuint_mb(next, spu); /* Step 36. */
+ send_restore_code(next, spu); /* Step 37. */
+ set_ppu_querymask(next, spu); /* Step 38. */
+ wait_tag_complete(next, spu); /* Step 39. */
+ wait_spu_stopped(next, spu); /* Step 40. */
+}
+
+static void restore_csa(struct spu_state *next, struct spu *spu)
+{
+ /*
+ * Combine steps 41-76 of SPU context restore sequence, which
+ * restore regions of the privileged & problem state areas.
+ */
+
+ restore_spu_privcntl(next, spu); /* Step 41. */
+ restore_status_part1(next, spu); /* Step 42. */
+ restore_status_part2(next, spu); /* Step 43. */
+ restore_ls_16kb(next, spu); /* Step 44. */
+ wait_tag_complete(next, spu); /* Step 45. */
+ suspend_mfc(next, spu); /* Step 46. */
+ wait_suspend_mfc_complete(next, spu); /* Step 47. */
+ issue_mfc_tlbie(next, spu); /* Step 48. */
+ clear_interrupts(next, spu); /* Step 49. */
+ restore_mfc_queues(next, spu); /* Step 50. */
+ restore_ppu_querymask(next, spu); /* Step 51. */
+ restore_ppu_querytype(next, spu); /* Step 52. */
+ restore_mfc_csr_tsq(next, spu); /* Step 53. */
+ restore_mfc_csr_cmd(next, spu); /* Step 54. */
+ restore_mfc_csr_ato(next, spu); /* Step 55. */
+ restore_mfc_tclass_id(next, spu); /* Step 56. */
+ set_llr_event(next, spu); /* Step 57. */
+ restore_decr_wrapped(next, spu); /* Step 58. */
+ restore_ch_part1(next, spu); /* Step 59. */
+ restore_ch_part2(next, spu); /* Step 60. */
+ restore_spu_lslr(next, spu); /* Step 61. */
+ restore_spu_cfg(next, spu); /* Step 62. */
+ restore_pm_trace(next, spu); /* Step 63. */
+ restore_spu_npc(next, spu); /* Step 64. */
+ restore_spu_mb(next, spu); /* Step 65. */
+ check_ppu_mb_stat(next, spu); /* Step 66. */
+ check_ppuint_mb_stat(next, spu); /* Step 67. */
+ spu_invalidate_slbs(spu); /* Modified Step 68. */
+ restore_mfc_sr1(next, spu); /* Step 69. */
+ set_int_route(next, spu); /* NEW */
+ restore_other_spu_access(next, spu); /* Step 70. */
+ restore_spu_runcntl(next, spu); /* Step 71. */
+ restore_mfc_cntl(next, spu); /* Step 72. */
+ enable_user_access(next, spu); /* Step 73. */
+ reset_switch_active(next, spu); /* Step 74. */
+ reenable_interrupts(next, spu); /* Step 75. */
+}
+
+static int __do_spu_save(struct spu_state *prev, struct spu *spu)
+{
+ int rc;
+
+ /*
+ * SPU context save can be broken into three phases:
+ *
+ * (a) quiesce [steps 2-16].
+ * (b) save of CSA, performed by PPE [steps 17-42]
+ * (c) save of LSCSA, mostly performed by SPU [steps 43-52].
+ *
+ * Returns 0 on success.
+ * 2,6 if failed to quiece SPU
+ * 53 if SPU-side of save failed.
+ */
+
+ rc = quiece_spu(prev, spu); /* Steps 2-16. */
+ switch (rc) {
+ default:
+ case 2:
+ case 6:
+ harvest(prev, spu);
+ return rc;
+ break;
+ case 0:
+ break;
+ }
+ save_csa(prev, spu); /* Steps 17-43. */
+ save_lscsa(prev, spu); /* Steps 44-53. */
+ return check_save_status(prev, spu); /* Step 54. */
+}
+
+static int __do_spu_restore(struct spu_state *next, struct spu *spu)
+{
+ int rc;
+
+ /*
+ * SPU context restore can be broken into three phases:
+ *
+ * (a) harvest (or reset) SPU [steps 2-24].
+ * (b) restore LSCSA [steps 25-40], mostly performed by SPU.
+ * (c) restore CSA [steps 41-76], performed by PPE.
+ *
+ * The 'harvest' step is not performed here, but rather
+ * as needed below.
+ */
+
+ restore_lscsa(next, spu); /* Steps 24-39. */
+ rc = check_restore_status(next, spu); /* Step 40. */
+ switch (rc) {
+ default:
+ /* Failed. Return now. */
+ return rc;
+ break;
+ case 0:
+ /* Fall through to next step. */
+ break;
+ }
+ restore_csa(next, spu);
+
+ return 0;
+}
+
+/**
+ * spu_save - SPU context save, with locking.
+ * @prev: pointer to SPU context save area, to be saved.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Acquire locks, perform the save operation then return.
+ */
+int spu_save(struct spu_state *prev, struct spu *spu)
+{
+ int rc;
+
+ acquire_spu_lock(spu); /* Step 1. */
+ rc = __do_spu_save(prev, spu); /* Steps 2-53. */
+ release_spu_lock(spu);
+ if (rc != 0 && rc != 2 && rc != 6) {
+ panic("%s failed on SPU[%d], rc=%d.\n",
+ __func__, spu->number, rc);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(spu_save);
+
+/**
+ * spu_restore - SPU context restore, with harvest and locking.
+ * @new: pointer to SPU context save area, to be restored.
+ * @spu: pointer to SPU iomem structure.
+ *
+ * Perform harvest + restore, as we may not be coming
+ * from a previous successful save operation, and the
+ * hardware state is unknown.
+ */
+int spu_restore(struct spu_state *new, struct spu *spu)
+{
+ int rc;
+
+ acquire_spu_lock(spu);
+ harvest(NULL, spu);
+ spu->slb_replace = 0;
+ rc = __do_spu_restore(new, spu);
+ release_spu_lock(spu);
+ if (rc) {
+ panic("%s failed on SPU[%d] rc=%d.\n",
+ __func__, spu->number, rc);
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(spu_restore);
+
+static void init_prob(struct spu_state *csa)
+{
+ csa->spu_chnlcnt_RW[9] = 1;
+ csa->spu_chnlcnt_RW[21] = 16;
+ csa->spu_chnlcnt_RW[23] = 1;
+ csa->spu_chnlcnt_RW[28] = 1;
+ csa->spu_chnlcnt_RW[30] = 1;
+ csa->prob.spu_runcntl_RW = SPU_RUNCNTL_STOP;
+ csa->prob.mb_stat_R = 0x000400;
+}
+
+static void init_priv1(struct spu_state *csa)
+{
+ /* Enable decode, relocate, tlbie response, master runcntl. */
+ csa->priv1.mfc_sr1_RW = MFC_STATE1_LOCAL_STORAGE_DECODE_MASK |
+ MFC_STATE1_MASTER_RUN_CONTROL_MASK |
+ MFC_STATE1_PROBLEM_STATE_MASK |
+ MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK;
+
+ /* Enable OS-specific set of interrupts. */
+ csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR |
+ CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR |
+ CLASS0_ENABLE_SPU_ERROR_INTR;
+ csa->priv1.int_mask_class1_RW = CLASS1_ENABLE_SEGMENT_FAULT_INTR |
+ CLASS1_ENABLE_STORAGE_FAULT_INTR;
+ csa->priv1.int_mask_class2_RW = CLASS2_ENABLE_SPU_STOP_INTR |
+ CLASS2_ENABLE_SPU_HALT_INTR |
+ CLASS2_ENABLE_SPU_DMA_TAG_GROUP_COMPLETE_INTR;
+}
+
+static void init_priv2(struct spu_state *csa)
+{
+ csa->priv2.spu_lslr_RW = LS_ADDR_MASK;
+ csa->priv2.mfc_control_RW = MFC_CNTL_RESUME_DMA_QUEUE |
+ MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION |
+ MFC_CNTL_DMA_QUEUES_EMPTY_MASK;
+}
+
+/**
+ * spu_alloc_csa - allocate and initialize an SPU context save area.
+ *
+ * Allocate and initialize the contents of an SPU context save area.
+ * This includes enabling address translation, interrupt masks, etc.,
+ * as appropriate for the given OS environment.
+ *
+ * Note that storage for the 'lscsa' is allocated separately,
+ * as it is by far the largest of the context save regions,
+ * and may need to be pinned or otherwise specially aligned.
+ */
+int spu_init_csa(struct spu_state *csa)
+{
+ int rc;
+
+ if (!csa)
+ return -EINVAL;
+ memset(csa, 0, sizeof(struct spu_state));
+
+ rc = spu_alloc_lscsa(csa);
+ if (rc)
+ return rc;
+
+ spin_lock_init(&csa->register_lock);
+
+ init_prob(csa);
+ init_priv1(csa);
+ init_priv2(csa);
+
+ return 0;
+}
+
+void spu_fini_csa(struct spu_state *csa)
+{
+ spu_free_lscsa(csa);
+}
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
new file mode 100644
index 000000000..157e046e6
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/export.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/slab.h>
+
+#include <linux/uaccess.h>
+
+#include "spufs.h"
+
+/**
+ * sys_spu_run - run code loaded into an SPU
+ *
+ * @unpc: next program counter for the SPU
+ * @ustatus: status of the SPU
+ *
+ * This system call transfers the control of execution of a
+ * user space thread to an SPU. It will return when the
+ * SPU has finished executing or when it hits an error
+ * condition and it will be interrupted if a signal needs
+ * to be delivered to a handler in user space.
+ *
+ * The next program counter is set to the passed value
+ * before the SPU starts fetching code and the user space
+ * pointer gets updated with the new value when returning
+ * from kernel space.
+ *
+ * The status value returned from spu_run reflects the
+ * value of the spu_status register after the SPU has stopped.
+ *
+ */
+static long do_spu_run(struct file *filp,
+ __u32 __user *unpc,
+ __u32 __user *ustatus)
+{
+ long ret;
+ struct spufs_inode_info *i;
+ u32 npc, status;
+
+ ret = -EFAULT;
+ if (get_user(npc, unpc))
+ goto out;
+
+ /* check if this file was created by spu_create */
+ ret = -EINVAL;
+ if (filp->f_op != &spufs_context_fops)
+ goto out;
+
+ i = SPUFS_I(file_inode(filp));
+ ret = spufs_run_spu(i->i_ctx, &npc, &status);
+
+ if (put_user(npc, unpc))
+ ret = -EFAULT;
+
+ if (ustatus && put_user(status, ustatus))
+ ret = -EFAULT;
+out:
+ return ret;
+}
+
+static long do_spu_create(const char __user *pathname, unsigned int flags,
+ umode_t mode, struct file *neighbor)
+{
+ struct path path;
+ struct dentry *dentry;
+ int ret;
+
+ dentry = user_path_create(AT_FDCWD, pathname, &path, LOOKUP_DIRECTORY);
+ ret = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ ret = spufs_create(&path, dentry, flags, mode, neighbor);
+ done_path_create(&path, dentry);
+ }
+
+ return ret;
+}
+
+struct spufs_calls spufs_calls = {
+ .create_thread = do_spu_create,
+ .spu_run = do_spu_run,
+ .notify_spus_active = do_notify_spus_active,
+ .owner = THIS_MODULE,
+#ifdef CONFIG_COREDUMP
+ .coredump_extra_notes_size = spufs_coredump_extra_notes_size,
+ .coredump_extra_notes_write = spufs_coredump_extra_notes_write,
+#endif
+};
diff --git a/arch/powerpc/platforms/chrp/Kconfig b/arch/powerpc/platforms/chrp/Kconfig
new file mode 100644
index 000000000..ff30ed579
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_CHRP
+ bool "Common Hardware Reference Platform (CHRP) based machines"
+ depends on PPC_BOOK3S_32
+ select HAVE_PCSPKR_PLATFORM
+ select MPIC
+ select PPC_I8259
+ select PPC_INDIRECT_PCI
+ select PPC_RTAS
+ select PPC_RTAS_DAEMON
+ select RTAS_ERROR_LOGGING
+ select PPC_MPC106
+ select PPC_UDBG_16550
+ select PPC_HASH_MMU_NATIVE
+ select FORCE_PCI
+ default y
diff --git a/arch/powerpc/platforms/chrp/Makefile b/arch/powerpc/platforms/chrp/Makefile
new file mode 100644
index 000000000..05639db9a
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y += setup.o time.o pegasos_eth.o pci.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_NVRAM:m=y) += nvram.o
diff --git a/arch/powerpc/platforms/chrp/chrp.h b/arch/powerpc/platforms/chrp/chrp.h
new file mode 100644
index 000000000..6ff4631d9
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/chrp.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Declarations of CHRP platform-specific things.
+ */
+
+extern void chrp_nvram_init(void);
+extern void chrp_get_rtc_time(struct rtc_time *);
+extern int chrp_set_rtc_time(struct rtc_time *);
+extern long chrp_time_init(void);
+
+extern void chrp_find_bridges(void);
diff --git a/arch/powerpc/platforms/chrp/gg2.h b/arch/powerpc/platforms/chrp/gg2.h
new file mode 100644
index 000000000..341ae55b9
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/gg2.h
@@ -0,0 +1,61 @@
+/*
+ * include/asm-ppc/gg2.h -- VLSI VAS96011/12 `Golden Gate 2' register definitions
+ *
+ * Copyright (C) 1997 Geert Uytterhoeven
+ *
+ * This file is based on the following documentation:
+ *
+ * The VAS96011/12 Chipset, Data Book, Edition 1.0
+ * VLSI Technology, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _ASMPPC_GG2_H
+#define _ASMPPC_GG2_H
+
+ /*
+ * Memory Map (CHRP mode)
+ */
+
+#define GG2_PCI_MEM_BASE 0xc0000000 /* Peripheral memory space */
+#define GG2_ISA_MEM_BASE 0xf7000000 /* Peripheral memory alias */
+#define GG2_ISA_IO_BASE 0xf8000000 /* Peripheral I/O space */
+#define GG2_PCI_CONFIG_BASE 0xfec00000 /* PCI configuration space */
+#define GG2_INT_ACK_SPECIAL 0xfec80000 /* Interrupt acknowledge and */
+ /* special PCI cycles */
+#define GG2_ROM_BASE0 0xff000000 /* ROM bank 0 */
+#define GG2_ROM_BASE1 0xff800000 /* ROM bank 1 */
+
+
+ /*
+ * GG2 specific PCI Registers
+ */
+
+extern void __iomem *gg2_pci_config_base; /* kernel virtual address */
+
+#define GG2_PCI_BUSNO 0x40 /* Bus number */
+#define GG2_PCI_SUBBUSNO 0x41 /* Subordinate bus number */
+#define GG2_PCI_DISCCTR 0x42 /* Disconnect counter */
+#define GG2_PCI_PPC_CTRL 0x50 /* PowerPC interface control register */
+#define GG2_PCI_ADDR_MAP 0x5c /* Address map */
+#define GG2_PCI_PCI_CTRL 0x60 /* PCI interface control register */
+#define GG2_PCI_ROM_CTRL 0x70 /* ROM interface control register */
+#define GG2_PCI_ROM_TIME 0x74 /* ROM timing */
+#define GG2_PCI_CC_CTRL 0x80 /* Cache controller control register */
+#define GG2_PCI_DRAM_BANK0 0x90 /* Control register for DRAM bank #0 */
+#define GG2_PCI_DRAM_BANK1 0x94 /* Control register for DRAM bank #1 */
+#define GG2_PCI_DRAM_BANK2 0x98 /* Control register for DRAM bank #2 */
+#define GG2_PCI_DRAM_BANK3 0x9c /* Control register for DRAM bank #3 */
+#define GG2_PCI_DRAM_BANK4 0xa0 /* Control register for DRAM bank #4 */
+#define GG2_PCI_DRAM_BANK5 0xa4 /* Control register for DRAM bank #5 */
+#define GG2_PCI_DRAM_TIME0 0xb0 /* Timing parameters set #0 */
+#define GG2_PCI_DRAM_TIME1 0xb4 /* Timing parameters set #1 */
+#define GG2_PCI_DRAM_CTRL 0xc0 /* DRAM control */
+#define GG2_PCI_ERR_CTRL 0xd0 /* Error control register */
+#define GG2_PCI_ERR_STATUS 0xd4 /* Error status register */
+ /* Cleared when read */
+
+#endif /* _ASMPPC_GG2_H */
diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c
new file mode 100644
index 000000000..0eedae964
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/nvram.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * /dev/nvram driver for PPC
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include "chrp.h"
+
+static unsigned int nvram_size;
+static unsigned char nvram_buf[4];
+static DEFINE_SPINLOCK(nvram_lock);
+
+static unsigned char chrp_nvram_read_val(int addr)
+{
+ unsigned int done;
+ unsigned long flags;
+ unsigned char ret;
+
+ if (addr >= nvram_size) {
+ printk(KERN_DEBUG "%s: read addr %d > nvram_size %u\n",
+ current->comm, addr, nvram_size);
+ return 0xff;
+ }
+ spin_lock_irqsave(&nvram_lock, flags);
+ if ((rtas_call(rtas_function_token(RTAS_FN_NVRAM_FETCH), 3, 2, &done, addr,
+ __pa(nvram_buf), 1) != 0) || 1 != done)
+ ret = 0xff;
+ else
+ ret = nvram_buf[0];
+ spin_unlock_irqrestore(&nvram_lock, flags);
+
+ return ret;
+}
+
+static void chrp_nvram_write_val(int addr, unsigned char val)
+{
+ unsigned int done;
+ unsigned long flags;
+
+ if (addr >= nvram_size) {
+ printk(KERN_DEBUG "%s: write addr %d > nvram_size %u\n",
+ current->comm, addr, nvram_size);
+ return;
+ }
+ spin_lock_irqsave(&nvram_lock, flags);
+ nvram_buf[0] = val;
+ if ((rtas_call(rtas_function_token(RTAS_FN_NVRAM_STORE), 3, 2, &done, addr,
+ __pa(nvram_buf), 1) != 0) || 1 != done)
+ printk(KERN_DEBUG "rtas IO error storing 0x%02x at %d", val, addr);
+ spin_unlock_irqrestore(&nvram_lock, flags);
+}
+
+static ssize_t chrp_nvram_size(void)
+{
+ return nvram_size;
+}
+
+void __init chrp_nvram_init(void)
+{
+ struct device_node *nvram;
+ const __be32 *nbytes_p;
+ unsigned int proplen;
+
+ nvram = of_find_node_by_type(NULL, "nvram");
+ if (nvram == NULL)
+ return;
+
+ nbytes_p = of_get_property(nvram, "#bytes", &proplen);
+ if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
+ of_node_put(nvram);
+ return;
+ }
+
+ nvram_size = be32_to_cpup(nbytes_p);
+
+ printk(KERN_INFO "CHRP nvram contains %u bytes\n", nvram_size);
+ of_node_put(nvram);
+
+ ppc_md.nvram_read_val = chrp_nvram_read_val;
+ ppc_md.nvram_write_val = chrp_nvram_write_val;
+ ppc_md.nvram_size = chrp_nvram_size;
+
+ return;
+}
+
+MODULE_LICENSE("GPL v2");
diff --git a/arch/powerpc/platforms/chrp/pci.c b/arch/powerpc/platforms/chrp/pci.c
new file mode 100644
index 000000000..428fd2a7b
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/pci.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CHRP pci routines.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/hydra.h>
+#include <asm/machdep.h>
+#include <asm/sections.h>
+#include <asm/pci-bridge.h>
+#include <asm/grackle.h>
+#include <asm/rtas.h>
+
+#include "chrp.h"
+#include "gg2.h"
+
+/* LongTrail */
+void __iomem *gg2_pci_config_base;
+
+/*
+ * The VLSI Golden Gate II has only 512K of PCI configuration space, so we
+ * limit the bus number to 3 bits
+ */
+
+static int gg2_read_config(struct pci_bus *bus, unsigned int devfn, int off,
+ int len, u32 *val)
+{
+ volatile void __iomem *cfg_data;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ if (bus->number > 7)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ /*
+ * Note: the caller has already checked that off is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ cfg_data = hose->cfg_data + ((bus->number<<16) | (devfn<<8) | off);
+ switch (len) {
+ case 1:
+ *val = in_8(cfg_data);
+ break;
+ case 2:
+ *val = in_le16(cfg_data);
+ break;
+ default:
+ *val = in_le32(cfg_data);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int gg2_write_config(struct pci_bus *bus, unsigned int devfn, int off,
+ int len, u32 val)
+{
+ volatile void __iomem *cfg_data;
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ if (bus->number > 7)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ /*
+ * Note: the caller has already checked that off is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ cfg_data = hose->cfg_data + ((bus->number<<16) | (devfn<<8) | off);
+ switch (len) {
+ case 1:
+ out_8(cfg_data, val);
+ break;
+ case 2:
+ out_le16(cfg_data, val);
+ break;
+ default:
+ out_le32(cfg_data, val);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops gg2_pci_ops =
+{
+ .read = gg2_read_config,
+ .write = gg2_write_config,
+};
+
+/*
+ * Access functions for PCI config space using RTAS calls.
+ */
+static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
+ int len, u32 *val)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+ | (((bus->number - hose->first_busno) & 0xff) << 16)
+ | (hose->global_number << 24);
+ int ret = -1;
+ int rval;
+
+ rval = rtas_call(rtas_function_token(RTAS_FN_READ_PCI_CONFIG), 2, 2, &ret, addr, len);
+ *val = ret;
+ return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL;
+}
+
+static int rtas_write_config(struct pci_bus *bus, unsigned int devfn, int offset,
+ int len, u32 val)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ unsigned long addr = (offset & 0xff) | ((devfn & 0xff) << 8)
+ | (((bus->number - hose->first_busno) & 0xff) << 16)
+ | (hose->global_number << 24);
+ int rval;
+
+ rval = rtas_call(rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG), 3, 1, NULL,
+ addr, len, val);
+ return rval? PCIBIOS_DEVICE_NOT_FOUND: PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops rtas_pci_ops =
+{
+ .read = rtas_read_config,
+ .write = rtas_write_config,
+};
+
+volatile struct Hydra __iomem *Hydra = NULL;
+
+static int __init hydra_init(void)
+{
+ struct device_node *np;
+ struct resource r;
+
+ np = of_find_node_by_name(NULL, "mac-io");
+ if (np == NULL || of_address_to_resource(np, 0, &r)) {
+ of_node_put(np);
+ return 0;
+ }
+ of_node_put(np);
+ Hydra = ioremap(r.start, resource_size(&r));
+ printk("Hydra Mac I/O at %llx\n", (unsigned long long)r.start);
+ printk("Hydra Feature_Control was %x",
+ in_le32(&Hydra->Feature_Control));
+ out_le32(&Hydra->Feature_Control, (HYDRA_FC_SCC_CELL_EN |
+ HYDRA_FC_SCSI_CELL_EN |
+ HYDRA_FC_SCCA_ENABLE |
+ HYDRA_FC_SCCB_ENABLE |
+ HYDRA_FC_ARB_BYPASS |
+ HYDRA_FC_MPIC_ENABLE |
+ HYDRA_FC_SLOW_SCC_PCLK |
+ HYDRA_FC_MPIC_IS_MASTER));
+ printk(", now %x\n", in_le32(&Hydra->Feature_Control));
+ return 1;
+}
+
+#define PRG_CL_RESET_VALID 0x00010000
+
+static void __init
+setup_python(struct pci_controller *hose, struct device_node *dev)
+{
+ u32 __iomem *reg;
+ u32 val;
+ struct resource r;
+
+ if (of_address_to_resource(dev, 0, &r)) {
+ printk(KERN_ERR "No address for Python PCI controller\n");
+ return;
+ }
+
+ /* Clear the magic go-slow bit */
+ reg = ioremap(r.start + 0xf6000, 0x40);
+ BUG_ON(!reg);
+ val = in_be32(&reg[12]);
+ if (val & PRG_CL_RESET_VALID) {
+ out_be32(&reg[12], val & ~PRG_CL_RESET_VALID);
+ in_be32(&reg[12]);
+ }
+ iounmap(reg);
+
+ setup_indirect_pci(hose, r.start + 0xf8000, r.start + 0xf8010, 0);
+}
+
+/* Marvell Discovery II based Pegasos 2 */
+static void __init setup_peg2(struct pci_controller *hose, struct device_node *dev)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ struct device_node *rtas;
+
+ rtas = of_find_node_by_name (root, "rtas");
+ if (rtas) {
+ hose->ops = &rtas_pci_ops;
+ of_node_put(rtas);
+ } else {
+ printk ("RTAS supporting Pegasos OF not found, please upgrade"
+ " your firmware\n");
+ }
+ pci_add_flags(PCI_REASSIGN_ALL_BUS);
+ /* keep the reference to the root node */
+}
+
+void __init
+chrp_find_bridges(void)
+{
+ struct device_node *dev;
+ const int *bus_range;
+ int len, index = -1;
+ struct pci_controller *hose;
+ const unsigned int *dma;
+ const char *model, *machine;
+ int is_longtrail = 0, is_mot = 0, is_pegasos = 0;
+ struct device_node *root = of_find_node_by_path("/");
+ struct resource r;
+ /*
+ * The PCI host bridge nodes on some machines don't have
+ * properties to adequately identify them, so we have to
+ * look at what sort of machine this is as well.
+ */
+ machine = of_get_property(root, "model", NULL);
+ if (machine != NULL) {
+ is_longtrail = strncmp(machine, "IBM,LongTrail", 13) == 0;
+ is_mot = strncmp(machine, "MOT", 3) == 0;
+ if (strncmp(machine, "Pegasos2", 8) == 0)
+ is_pegasos = 2;
+ else if (strncmp(machine, "Pegasos", 7) == 0)
+ is_pegasos = 1;
+ }
+ for_each_child_of_node(root, dev) {
+ if (!of_node_is_type(dev, "pci"))
+ continue;
+ ++index;
+ /* The GG2 bridge on the LongTrail doesn't have an address */
+ if (of_address_to_resource(dev, 0, &r) && !is_longtrail) {
+ printk(KERN_WARNING "Can't use %pOF: no address\n",
+ dev);
+ continue;
+ }
+ bus_range = of_get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ printk(KERN_WARNING "Can't get bus-range for %pOF\n",
+ dev);
+ continue;
+ }
+ if (bus_range[1] == bus_range[0])
+ printk(KERN_INFO "PCI bus %d", bus_range[0]);
+ else
+ printk(KERN_INFO "PCI buses %d..%d",
+ bus_range[0], bus_range[1]);
+ printk(" controlled by %pOF", dev);
+ if (!is_longtrail)
+ printk(" at %llx", (unsigned long long)r.start);
+ printk("\n");
+
+ hose = pcibios_alloc_controller(dev);
+ if (!hose) {
+ printk("Can't allocate PCI controller structure for %pOF\n",
+ dev);
+ continue;
+ }
+ hose->first_busno = hose->self_busno = bus_range[0];
+ hose->last_busno = bus_range[1];
+
+ model = of_get_property(dev, "model", NULL);
+ if (model == NULL)
+ model = "<none>";
+ if (strncmp(model, "IBM, Python", 11) == 0) {
+ setup_python(hose, dev);
+ } else if (is_mot
+ || strncmp(model, "Motorola, Grackle", 17) == 0) {
+ setup_grackle(hose);
+ } else if (is_longtrail) {
+ void __iomem *p = ioremap(GG2_PCI_CONFIG_BASE, 0x80000);
+ hose->ops = &gg2_pci_ops;
+ hose->cfg_data = p;
+ gg2_pci_config_base = p;
+ } else if (is_pegasos == 1) {
+ setup_indirect_pci(hose, 0xfec00cf8, 0xfee00cfc, 0);
+ } else if (is_pegasos == 2) {
+ setup_peg2(hose, dev);
+ } else if (!strncmp(model, "IBM,CPC710", 10)) {
+ setup_indirect_pci(hose,
+ r.start + 0x000f8000,
+ r.start + 0x000f8010,
+ 0);
+ if (index == 0) {
+ dma = of_get_property(dev, "system-dma-base",
+ &len);
+ if (dma && len >= sizeof(*dma)) {
+ dma = (unsigned int *)
+ (((unsigned long)dma) +
+ len - sizeof(*dma));
+ pci_dram_offset = *dma;
+ }
+ }
+ } else {
+ printk("No methods for %pOF (model %s), using RTAS\n",
+ dev, model);
+ hose->ops = &rtas_pci_ops;
+ }
+
+ pci_process_bridge_OF_ranges(hose, dev, index == 0);
+
+ /* check the first bridge for a property that we can
+ use to set pci_dram_offset */
+ dma = of_get_property(dev, "ibm,dma-ranges", &len);
+ if (index == 0 && dma != NULL && len >= 6 * sizeof(*dma)) {
+ pci_dram_offset = dma[2] - dma[3];
+ printk("pci_dram_offset = %lx\n", pci_dram_offset);
+ }
+ }
+ of_node_put(root);
+
+ /*
+ * "Temporary" fixes for PCI devices.
+ * -- Geert
+ */
+ hydra_init(); /* Mac I/O */
+
+ pci_create_OF_bus_map();
+}
+
+/* SL82C105 IDE Control/Status Register */
+#define SL82C105_IDECSR 0x40
+
+/* Fixup for Winbond ATA quirk, required for briq mostly because the
+ * 8259 is configured for level sensitive IRQ 14 and so wants the
+ * ATA controller to be set to fully native mode or bad things
+ * will happen.
+ */
+static void chrp_pci_fixup_winbond_ata(struct pci_dev *sl82c105)
+{
+ u8 progif;
+
+ /* If non-briq machines need that fixup too, please speak up */
+ if (!machine_is(chrp) || _chrp_type != _CHRP_briq)
+ return;
+
+ if ((sl82c105->class & 5) != 5) {
+ printk("W83C553: Switching SL82C105 IDE to PCI native mode\n");
+ /* Enable SL82C105 PCI native IDE mode */
+ pci_read_config_byte(sl82c105, PCI_CLASS_PROG, &progif);
+ pci_write_config_byte(sl82c105, PCI_CLASS_PROG, progif | 0x05);
+ sl82c105->class |= 0x05;
+ /* Disable SL82C105 second port */
+ pci_write_config_word(sl82c105, SL82C105_IDECSR, 0x0003);
+ /* Clear IO BARs, they will be reassigned */
+ pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_0, 0);
+ pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_1, 0);
+ pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_2, 0);
+ pci_write_config_dword(sl82c105, PCI_BASE_ADDRESS_3, 0);
+ }
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
+ chrp_pci_fixup_winbond_ata);
+
+/* Pegasos2 firmware version 20040810 configures the built-in IDE controller
+ * in legacy mode, but sets the PCI registers to PCI native mode.
+ * The chip can only operate in legacy mode, so force the PCI class into legacy
+ * mode as well. The same fixup must be done to the class-code property in
+ * the IDE node /pci@80000000/ide@C,1
+ */
+static void chrp_pci_fixup_vt8231_ata(struct pci_dev *viaide)
+{
+ u8 progif;
+ struct pci_dev *viaisa;
+
+ if (!machine_is(chrp) || _chrp_type != _CHRP_Pegasos)
+ return;
+ if (viaide->irq != 14)
+ return;
+
+ viaisa = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8231, NULL);
+ if (!viaisa)
+ return;
+ dev_info(&viaide->dev, "Fixing VIA IDE, force legacy mode on\n");
+
+ pci_read_config_byte(viaide, PCI_CLASS_PROG, &progif);
+ pci_write_config_byte(viaide, PCI_CLASS_PROG, progif & ~0x5);
+ viaide->class &= ~0x5;
+
+ pci_dev_put(viaisa);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_1, chrp_pci_fixup_vt8231_ata);
diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c
new file mode 100644
index 000000000..5c4f1a9ca
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/pegasos_eth.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2005 Sven Luther <sl@bplan-gmbh.de>
+ * Thanks to :
+ * Dale Farnsworth <dale@farnsworth.org>
+ * Mark A. Greer <mgreer@mvista.com>
+ * Nicolas DET <nd@bplan-gmbh.de>
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * And anyone else who helped me on this.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/mv643xx.h>
+#include <linux/pci.h>
+
+#define PEGASOS2_MARVELL_REGBASE (0xf1000000)
+#define PEGASOS2_MARVELL_REGSIZE (0x00004000)
+#define PEGASOS2_SRAM_BASE (0xf2000000)
+#define PEGASOS2_SRAM_SIZE (256*1024)
+
+#define PEGASOS2_SRAM_BASE_ETH_PORT0 (PEGASOS2_SRAM_BASE)
+#define PEGASOS2_SRAM_BASE_ETH_PORT1 (PEGASOS2_SRAM_BASE_ETH_PORT0 + (PEGASOS2_SRAM_SIZE / 2) )
+
+
+#define PEGASOS2_SRAM_RXRING_SIZE (PEGASOS2_SRAM_SIZE/4)
+#define PEGASOS2_SRAM_TXRING_SIZE (PEGASOS2_SRAM_SIZE/4)
+
+#undef BE_VERBOSE
+
+static struct resource mv643xx_eth_shared_resources[] = {
+ [0] = {
+ .name = "ethernet shared base",
+ .start = 0xf1000000 + MV643XX_ETH_SHARED_REGS,
+ .end = 0xf1000000 + MV643XX_ETH_SHARED_REGS +
+ MV643XX_ETH_SHARED_REGS_SIZE - 1,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct platform_device mv643xx_eth_shared_device = {
+ .name = MV643XX_ETH_SHARED_NAME,
+ .id = 0,
+ .num_resources = ARRAY_SIZE(mv643xx_eth_shared_resources),
+ .resource = mv643xx_eth_shared_resources,
+};
+
+/*
+ * The orion mdio driver only covers shared + 0x4 up to shared + 0x84 - 1
+ */
+static struct resource mv643xx_eth_mvmdio_resources[] = {
+ [0] = {
+ .name = "ethernet mdio base",
+ .start = 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x4,
+ .end = 0xf1000000 + MV643XX_ETH_SHARED_REGS + 0x83,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct platform_device mv643xx_eth_mvmdio_device = {
+ .name = "orion-mdio",
+ .id = -1,
+ .num_resources = ARRAY_SIZE(mv643xx_eth_mvmdio_resources),
+ .resource = mv643xx_eth_mvmdio_resources,
+};
+
+static struct resource mv643xx_eth_port1_resources[] = {
+ [0] = {
+ .name = "eth port1 irq",
+ .start = 9,
+ .end = 9,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct mv643xx_eth_platform_data eth_port1_pd = {
+ .shared = &mv643xx_eth_shared_device,
+ .port_number = 1,
+ .phy_addr = MV643XX_ETH_PHY_ADDR(7),
+
+ .tx_sram_addr = PEGASOS2_SRAM_BASE_ETH_PORT1,
+ .tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE,
+ .tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16,
+
+ .rx_sram_addr = PEGASOS2_SRAM_BASE_ETH_PORT1 + PEGASOS2_SRAM_TXRING_SIZE,
+ .rx_sram_size = PEGASOS2_SRAM_RXRING_SIZE,
+ .rx_queue_size = PEGASOS2_SRAM_RXRING_SIZE/16,
+};
+
+static struct platform_device eth_port1_device = {
+ .name = MV643XX_ETH_NAME,
+ .id = 1,
+ .num_resources = ARRAY_SIZE(mv643xx_eth_port1_resources),
+ .resource = mv643xx_eth_port1_resources,
+ .dev = {
+ .platform_data = &eth_port1_pd,
+ },
+};
+
+static struct platform_device *mv643xx_eth_pd_devs[] __initdata = {
+ &mv643xx_eth_shared_device,
+ &mv643xx_eth_mvmdio_device,
+ &eth_port1_device,
+};
+
+/***********/
+/***********/
+#define MV_READ(offset,val) { val = readl(mv643xx_reg_base + offset); }
+#define MV_WRITE(offset,data) writel(data, mv643xx_reg_base + offset)
+
+static void __iomem *mv643xx_reg_base;
+
+static int __init Enable_SRAM(void)
+{
+ u32 ALong;
+
+ if (mv643xx_reg_base == NULL)
+ mv643xx_reg_base = ioremap(PEGASOS2_MARVELL_REGBASE,
+ PEGASOS2_MARVELL_REGSIZE);
+
+ if (mv643xx_reg_base == NULL)
+ return -ENOMEM;
+
+#ifdef BE_VERBOSE
+ printk("Pegasos II/Marvell MV64361: register remapped from %p to %p\n",
+ (void *)PEGASOS2_MARVELL_REGBASE, (void *)mv643xx_reg_base);
+#endif
+
+ MV_WRITE(MV64340_SRAM_CONFIG, 0);
+
+ MV_WRITE(MV64340_INTEGRATED_SRAM_BASE_ADDR, PEGASOS2_SRAM_BASE >> 16);
+
+ MV_READ(MV64340_BASE_ADDR_ENABLE, ALong);
+ ALong &= ~(1 << 19);
+ MV_WRITE(MV64340_BASE_ADDR_ENABLE, ALong);
+
+ ALong = 0x02;
+ ALong |= PEGASOS2_SRAM_BASE & 0xffff0000;
+ MV_WRITE(MV643XX_ETH_BAR_4, ALong);
+
+ MV_WRITE(MV643XX_ETH_SIZE_REG_4, (PEGASOS2_SRAM_SIZE-1) & 0xffff0000);
+
+ MV_READ(MV643XX_ETH_BASE_ADDR_ENABLE_REG, ALong);
+ ALong &= ~(1 << 4);
+ MV_WRITE(MV643XX_ETH_BASE_ADDR_ENABLE_REG, ALong);
+
+#ifdef BE_VERBOSE
+ printk("Pegasos II/Marvell MV64361: register unmapped\n");
+ printk("Pegasos II/Marvell MV64361: SRAM at %p, size=%x\n", (void*) PEGASOS2_SRAM_BASE, PEGASOS2_SRAM_SIZE);
+#endif
+
+ iounmap(mv643xx_reg_base);
+ mv643xx_reg_base = NULL;
+
+ return 1;
+}
+
+
+/***********/
+/***********/
+static int __init mv643xx_eth_add_pds(void)
+{
+ int ret = 0;
+ static struct pci_device_id pci_marvell_mv64360[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_MARVELL, PCI_DEVICE_ID_MARVELL_MV64360) },
+ { }
+ };
+
+#ifdef BE_VERBOSE
+ printk("Pegasos II/Marvell MV64361: init\n");
+#endif
+
+ if (pci_dev_present(pci_marvell_mv64360)) {
+ ret = platform_add_devices(mv643xx_eth_pd_devs,
+ ARRAY_SIZE(mv643xx_eth_pd_devs));
+
+ if ( Enable_SRAM() < 0)
+ {
+ eth_port1_pd.tx_sram_addr = 0;
+ eth_port1_pd.tx_sram_size = 0;
+ eth_port1_pd.rx_sram_addr = 0;
+ eth_port1_pd.rx_sram_size = 0;
+
+#ifdef BE_VERBOSE
+ printk("Pegasos II/Marvell MV64361: Can't enable the "
+ "SRAM\n");
+#endif
+ }
+ }
+
+#ifdef BE_VERBOSE
+ printk("Pegasos II/Marvell MV64361: init is over\n");
+#endif
+
+ return ret;
+}
+
+device_initcall(mv643xx_eth_add_pds);
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
new file mode 100644
index 000000000..36ee3a505
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -0,0 +1,586 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 1995 Linus Torvalds
+ * Adapted from 'alpha' version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/major.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <generated/utsrelease.h>
+#include <linux/adb.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/console.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/initrd.h>
+#include <linux/timer.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_irq.h>
+
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/hydra.h>
+#include <asm/sections.h>
+#include <asm/time.h>
+#include <asm/i8259.h>
+#include <asm/mpic.h>
+#include <asm/rtas.h>
+#include <asm/xmon.h>
+
+#include "chrp.h"
+#include "gg2.h"
+
+void rtas_indicator_progress(char *, unsigned short);
+
+int _chrp_type;
+EXPORT_SYMBOL(_chrp_type);
+
+static struct mpic *chrp_mpic;
+
+/* Used for doing CHRP event-scans */
+DEFINE_PER_CPU(struct timer_list, heartbeat_timer);
+unsigned long event_scan_interval;
+
+extern unsigned long loops_per_jiffy;
+
+/* To be replaced by RTAS when available */
+static unsigned int __iomem *briq_SPOR;
+
+#ifdef CONFIG_SMP
+extern struct smp_ops_t chrp_smp_ops;
+#endif
+
+static const char *gg2_memtypes[4] = {
+ "FPM", "SDRAM", "EDO", "BEDO"
+};
+static const char *gg2_cachesizes[4] = {
+ "256 KB", "512 KB", "1 MB", "Reserved"
+};
+static const char *gg2_cachetypes[4] = {
+ "Asynchronous", "Reserved", "Flow-Through Synchronous",
+ "Pipelined Synchronous"
+};
+static const char *gg2_cachemodes[4] = {
+ "Disabled", "Write-Through", "Copy-Back", "Transparent Mode"
+};
+
+static const char *chrp_names[] = {
+ "Unknown",
+ "","","",
+ "Motorola",
+ "IBM or Longtrail",
+ "Genesi Pegasos",
+ "Total Impact Briq"
+};
+
+static void chrp_show_cpuinfo(struct seq_file *m)
+{
+ int i, sdramen;
+ unsigned int t;
+ struct device_node *root;
+ const char *model = "";
+
+ root = of_find_node_by_path("/");
+ if (root)
+ model = of_get_property(root, "model", NULL);
+ seq_printf(m, "machine\t\t: CHRP %s\n", model);
+
+ /* longtrail (goldengate) stuff */
+ if (model && !strncmp(model, "IBM,LongTrail", 13)) {
+ /* VLSI VAS96011/12 `Golden Gate 2' */
+ /* Memory banks */
+ sdramen = (in_le32(gg2_pci_config_base + GG2_PCI_DRAM_CTRL)
+ >>31) & 1;
+ for (i = 0; i < (sdramen ? 4 : 6); i++) {
+ t = in_le32(gg2_pci_config_base+
+ GG2_PCI_DRAM_BANK0+
+ i*4);
+ if (!(t & 1))
+ continue;
+ switch ((t>>8) & 0x1f) {
+ case 0x1f:
+ model = "4 MB";
+ break;
+ case 0x1e:
+ model = "8 MB";
+ break;
+ case 0x1c:
+ model = "16 MB";
+ break;
+ case 0x18:
+ model = "32 MB";
+ break;
+ case 0x10:
+ model = "64 MB";
+ break;
+ case 0x00:
+ model = "128 MB";
+ break;
+ default:
+ model = "Reserved";
+ break;
+ }
+ seq_printf(m, "memory bank %d\t: %s %s\n", i, model,
+ gg2_memtypes[sdramen ? 1 : ((t>>1) & 3)]);
+ }
+ /* L2 cache */
+ t = in_le32(gg2_pci_config_base+GG2_PCI_CC_CTRL);
+ seq_printf(m, "board l2\t: %s %s (%s)\n",
+ gg2_cachesizes[(t>>7) & 3],
+ gg2_cachetypes[(t>>2) & 3],
+ gg2_cachemodes[t & 3]);
+ }
+ of_node_put(root);
+}
+
+/*
+ * Fixes for the National Semiconductor PC78308VUL SuperI/O
+ *
+ * Some versions of Open Firmware incorrectly initialize the IRQ settings
+ * for keyboard and mouse
+ */
+static inline void __init sio_write(u8 val, u8 index)
+{
+ outb(index, 0x15c);
+ outb(val, 0x15d);
+}
+
+static inline u8 __init sio_read(u8 index)
+{
+ outb(index, 0x15c);
+ return inb(0x15d);
+}
+
+static void __init sio_fixup_irq(const char *name, u8 device, u8 level,
+ u8 type)
+{
+ u8 level0, type0, active;
+
+ /* select logical device */
+ sio_write(device, 0x07);
+ active = sio_read(0x30);
+ level0 = sio_read(0x70);
+ type0 = sio_read(0x71);
+ if (level0 != level || type0 != type || !active) {
+ printk(KERN_WARNING "sio: %s irq level %d, type %d, %sactive: "
+ "remapping to level %d, type %d, active\n",
+ name, level0, type0, !active ? "in" : "", level, type);
+ sio_write(0x01, 0x30);
+ sio_write(level, 0x70);
+ sio_write(type, 0x71);
+ }
+}
+
+static void __init sio_init(void)
+{
+ struct device_node *root;
+ const char *model;
+
+ root = of_find_node_by_path("/");
+ if (!root)
+ return;
+
+ model = of_get_property(root, "model", NULL);
+ if (model && !strncmp(model, "IBM,LongTrail", 13)) {
+ /* logical device 0 (KBC/Keyboard) */
+ sio_fixup_irq("keyboard", 0, 1, 2);
+ /* select logical device 1 (KBC/Mouse) */
+ sio_fixup_irq("mouse", 1, 12, 2);
+ }
+
+ of_node_put(root);
+}
+
+
+static void __init pegasos_set_l2cr(void)
+{
+ struct device_node *np;
+
+ /* On Pegasos, enable the l2 cache if needed, as the OF forgets it */
+ if (_chrp_type != _CHRP_Pegasos)
+ return;
+
+ /* Enable L2 cache if needed */
+ np = of_find_node_by_type(NULL, "cpu");
+ if (np != NULL) {
+ const unsigned int *l2cr = of_get_property(np, "l2cr", NULL);
+ if (l2cr == NULL) {
+ printk ("Pegasos l2cr : no cpu l2cr property found\n");
+ goto out;
+ }
+ if (!((*l2cr) & 0x80000000)) {
+ printk ("Pegasos l2cr : L2 cache was not active, "
+ "activating\n");
+ _set_L2CR(0);
+ _set_L2CR((*l2cr) | 0x80000000);
+ }
+ }
+out:
+ of_node_put(np);
+}
+
+static void __noreturn briq_restart(char *cmd)
+{
+ local_irq_disable();
+ if (briq_SPOR)
+ out_be32(briq_SPOR, 0);
+ for(;;);
+}
+
+/*
+ * Per default, input/output-device points to the keyboard/screen
+ * If no card is installed, the built-in serial port is used as a fallback.
+ * But unfortunately, the firmware does not connect /chosen/{stdin,stdout}
+ * to the built-in serial node. Instead, a /failsafe node is created.
+ */
+static __init void chrp_init(void)
+{
+ struct device_node *node;
+ const char *property;
+
+ if (strstr(boot_command_line, "console="))
+ return;
+ /* find the boot console from /chosen/stdout */
+ if (!of_chosen)
+ return;
+ node = of_find_node_by_path("/");
+ if (!node)
+ return;
+ property = of_get_property(node, "model", NULL);
+ if (!property)
+ goto out_put;
+ if (strcmp(property, "Pegasos2"))
+ goto out_put;
+ /* this is a Pegasos2 */
+ property = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (!property)
+ goto out_put;
+ of_node_put(node);
+ node = of_find_node_by_path(property);
+ if (!node)
+ return;
+ if (!of_node_is_type(node, "serial"))
+ goto out_put;
+ /*
+ * The 9pin connector is either /failsafe
+ * or /pci@80000000/isa@C/serial@i2F8
+ * The optional graphics card has also type 'serial' in VGA mode.
+ */
+ if (of_node_name_eq(node, "failsafe") || of_node_name_eq(node, "serial"))
+ add_preferred_console("ttyS", 0, NULL);
+out_put:
+ of_node_put(node);
+}
+
+static void __init chrp_setup_arch(void)
+{
+ struct device_node *root = of_find_node_by_path("/");
+ const char *machine = NULL;
+
+ /* init to some ~sane value until calibrate_delay() runs */
+ loops_per_jiffy = 50000000/HZ;
+
+ if (root)
+ machine = of_get_property(root, "model", NULL);
+ if (machine && strncmp(machine, "Pegasos", 7) == 0) {
+ _chrp_type = _CHRP_Pegasos;
+ } else if (machine && strncmp(machine, "IBM", 3) == 0) {
+ _chrp_type = _CHRP_IBM;
+ } else if (machine && strncmp(machine, "MOT", 3) == 0) {
+ _chrp_type = _CHRP_Motorola;
+ } else if (machine && strncmp(machine, "TotalImpact,BRIQ-1", 18) == 0) {
+ _chrp_type = _CHRP_briq;
+ /* Map the SPOR register on briq and change the restart hook */
+ briq_SPOR = ioremap(0xff0000e8, 4);
+ ppc_md.restart = briq_restart;
+ } else {
+ /* Let's assume it is an IBM chrp if all else fails */
+ _chrp_type = _CHRP_IBM;
+ }
+ of_node_put(root);
+ printk("chrp type = %x [%s]\n", _chrp_type, chrp_names[_chrp_type]);
+
+ rtas_initialize();
+ if (rtas_function_token(RTAS_FN_DISPLAY_CHARACTER) >= 0)
+ ppc_md.progress = rtas_progress;
+
+ /* use RTAS time-of-day routines if available */
+ if (rtas_function_token(RTAS_FN_GET_TIME_OF_DAY) != RTAS_UNKNOWN_SERVICE) {
+ ppc_md.get_boot_time = rtas_get_boot_time;
+ ppc_md.get_rtc_time = rtas_get_rtc_time;
+ ppc_md.set_rtc_time = rtas_set_rtc_time;
+ }
+
+ /* On pegasos, enable the L2 cache if not already done by OF */
+ pegasos_set_l2cr();
+
+ /*
+ * Fix the Super I/O configuration
+ */
+ sio_init();
+
+ /*
+ * Print the banner, then scroll down so boot progress
+ * can be printed. -- Cort
+ */
+ if (ppc_md.progress) ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0x0);
+}
+
+static void chrp_8259_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int cascade_irq = i8259_irq();
+
+ if (cascade_irq)
+ generic_handle_irq(cascade_irq);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+/*
+ * Finds the open-pic node and sets up the mpic driver.
+ */
+static void __init chrp_find_openpic(void)
+{
+ struct device_node *np, *root;
+ int len, i, j;
+ int isu_size;
+ const unsigned int *iranges, *opprop = NULL;
+ int oplen = 0;
+ unsigned long opaddr;
+ int na = 1;
+
+ np = of_find_node_by_type(NULL, "open-pic");
+ if (np == NULL)
+ return;
+ root = of_find_node_by_path("/");
+ if (root) {
+ opprop = of_get_property(root, "platform-open-pic", &oplen);
+ na = of_n_addr_cells(root);
+ }
+ if (opprop && oplen >= na * sizeof(unsigned int)) {
+ opaddr = opprop[na-1]; /* assume 32-bit */
+ oplen /= na * sizeof(unsigned int);
+ } else {
+ struct resource r;
+ if (of_address_to_resource(np, 0, &r)) {
+ goto bail;
+ }
+ opaddr = r.start;
+ oplen = 0;
+ }
+
+ printk(KERN_INFO "OpenPIC at %lx\n", opaddr);
+
+ iranges = of_get_property(np, "interrupt-ranges", &len);
+ if (iranges == NULL)
+ len = 0; /* non-distributed mpic */
+ else
+ len /= 2 * sizeof(unsigned int);
+
+ /*
+ * The first pair of cells in interrupt-ranges refers to the
+ * IDU; subsequent pairs refer to the ISUs.
+ */
+ if (oplen < len) {
+ printk(KERN_ERR "Insufficient addresses for distributed"
+ " OpenPIC (%d < %d)\n", oplen, len);
+ len = oplen;
+ }
+
+ isu_size = 0;
+ if (len > 0 && iranges[1] != 0) {
+ printk(KERN_INFO "OpenPIC irqs %d..%d in IDU\n",
+ iranges[0], iranges[0] + iranges[1] - 1);
+ }
+ if (len > 1)
+ isu_size = iranges[3];
+
+ chrp_mpic = mpic_alloc(np, opaddr, MPIC_NO_RESET,
+ isu_size, 0, " MPIC ");
+ if (chrp_mpic == NULL) {
+ printk(KERN_ERR "Failed to allocate MPIC structure\n");
+ goto bail;
+ }
+ j = na - 1;
+ for (i = 1; i < len; ++i) {
+ iranges += 2;
+ j += na;
+ printk(KERN_INFO "OpenPIC irqs %d..%d in ISU at %x\n",
+ iranges[0], iranges[0] + iranges[1] - 1,
+ opprop[j]);
+ mpic_assign_isu(chrp_mpic, i - 1, opprop[j]);
+ }
+
+ mpic_init(chrp_mpic);
+ ppc_md.get_irq = mpic_get_irq;
+ bail:
+ of_node_put(root);
+ of_node_put(np);
+}
+
+static void __init chrp_find_8259(void)
+{
+ struct device_node *np, *pic = NULL;
+ unsigned long chrp_int_ack = 0;
+ unsigned int cascade_irq;
+
+ /* Look for cascade */
+ for_each_node_by_type(np, "interrupt-controller")
+ if (of_device_is_compatible(np, "chrp,iic")) {
+ pic = np;
+ break;
+ }
+ /* Ok, 8259 wasn't found. We need to handle the case where
+ * we have a pegasos that claims to be chrp but doesn't have
+ * a proper interrupt tree
+ */
+ if (pic == NULL && chrp_mpic != NULL) {
+ printk(KERN_ERR "i8259: Not found in device-tree"
+ " assuming no legacy interrupts\n");
+ return;
+ }
+
+ /* Look for intack. In a perfect world, we would look for it on
+ * the ISA bus that holds the 8259 but heh... Works that way. If
+ * we ever see a problem, we can try to re-use the pSeries code here.
+ * Also, Pegasos-type platforms don't have a proper node to start
+ * from anyway
+ */
+ for_each_node_by_name(np, "pci") {
+ const unsigned int *addrp = of_get_property(np,
+ "8259-interrupt-acknowledge", NULL);
+
+ if (addrp == NULL)
+ continue;
+ chrp_int_ack = addrp[of_n_addr_cells(np)-1];
+ break;
+ }
+ of_node_put(np);
+ if (np == NULL)
+ printk(KERN_WARNING "Cannot find PCI interrupt acknowledge"
+ " address, polling\n");
+
+ i8259_init(pic, chrp_int_ack);
+ if (ppc_md.get_irq == NULL) {
+ ppc_md.get_irq = i8259_irq;
+ irq_set_default_host(i8259_get_host());
+ }
+ if (chrp_mpic != NULL) {
+ cascade_irq = irq_of_parse_and_map(pic, 0);
+ if (!cascade_irq)
+ printk(KERN_ERR "i8259: failed to map cascade irq\n");
+ else
+ irq_set_chained_handler(cascade_irq,
+ chrp_8259_cascade);
+ }
+}
+
+static void __init chrp_init_IRQ(void)
+{
+#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON)
+ struct device_node *kbd;
+#endif
+ chrp_find_openpic();
+ chrp_find_8259();
+
+#ifdef CONFIG_SMP
+ /* Pegasos has no MPIC, those ops would make it crash. It might be an
+ * option to move setting them to after we probe the PIC though
+ */
+ if (chrp_mpic != NULL)
+ smp_ops = &chrp_smp_ops;
+#endif /* CONFIG_SMP */
+
+ if (_chrp_type == _CHRP_Pegasos)
+ ppc_md.get_irq = i8259_irq;
+
+#if defined(CONFIG_VT) && defined(CONFIG_INPUT_ADBHID) && defined(CONFIG_XMON)
+ /* see if there is a keyboard in the device tree
+ with a parent of type "adb" */
+ for_each_node_by_name(kbd, "keyboard")
+ if (of_node_is_type(kbd->parent, "adb"))
+ break;
+ of_node_put(kbd);
+ if (kbd) {
+ if (request_irq(HYDRA_INT_ADB_NMI, xmon_irq, 0, "XMON break",
+ NULL))
+ pr_err("Failed to register XMON break interrupt\n");
+ }
+#endif
+}
+
+static void __init
+chrp_init2(void)
+{
+#if IS_ENABLED(CONFIG_NVRAM)
+ chrp_nvram_init();
+#endif
+
+ request_region(0x20,0x20,"pic1");
+ request_region(0xa0,0x20,"pic2");
+ request_region(0x00,0x20,"dma1");
+ request_region(0x40,0x20,"timer");
+ request_region(0x80,0x10,"dma page reg");
+ request_region(0xc0,0x20,"dma2");
+
+ if (ppc_md.progress)
+ ppc_md.progress(" Have fun! ", 0x7777);
+}
+
+static int __init chrp_probe(void)
+{
+ const char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
+ "device_type", NULL);
+ if (dtype == NULL)
+ return 0;
+ if (strcmp(dtype, "chrp"))
+ return 0;
+
+ DMA_MODE_READ = 0x44;
+ DMA_MODE_WRITE = 0x48;
+
+ pm_power_off = rtas_power_off;
+
+ chrp_init();
+
+ return 1;
+}
+
+define_machine(chrp) {
+ .name = "CHRP",
+ .probe = chrp_probe,
+ .setup_arch = chrp_setup_arch,
+ .discover_phbs = chrp_find_bridges,
+ .init = chrp_init2,
+ .show_cpuinfo = chrp_show_cpuinfo,
+ .init_IRQ = chrp_init_IRQ,
+ .restart = rtas_restart,
+ .halt = rtas_halt,
+ .time_init = chrp_time_init,
+ .set_rtc_time = chrp_set_rtc_time,
+ .get_rtc_time = chrp_get_rtc_time,
+ .phys_mem_access_prot = pci_phys_mem_access_prot,
+};
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
new file mode 100644
index 000000000..ab9515564
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Smp support for CHRP machines.
+ *
+ * Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
+ * deal of code from the sparc and intel versions.
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/pgtable.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/mpic.h>
+#include <asm/rtas.h>
+
+static int smp_chrp_kick_cpu(int nr)
+{
+ *(unsigned long *)KERNELBASE = nr;
+ asm volatile("dcbf 0,%0"::"r"(KERNELBASE):"memory");
+
+ return 0;
+}
+
+static void smp_chrp_setup_cpu(int cpu_nr)
+{
+ mpic_setup_this_cpu();
+}
+
+/* CHRP with openpic */
+struct smp_ops_t chrp_smp_ops = {
+ .cause_nmi_ipi = NULL,
+ .message_pass = smp_mpic_message_pass,
+ .probe = smp_mpic_probe,
+ .kick_cpu = smp_chrp_kick_cpu,
+ .setup_cpu = smp_chrp_setup_cpu,
+ .give_timebase = rtas_give_timebase,
+ .take_timebase = rtas_take_timebase,
+};
diff --git a/arch/powerpc/platforms/chrp/time.c b/arch/powerpc/platforms/chrp/time.c
new file mode 100644
index 000000000..d46417e3d
--- /dev/null
+++ b/arch/powerpc/platforms/chrp/time.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 1991, 1992, 1995 Linus Torvalds
+ *
+ * Adapted for PowerPC (PReP) by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu).
+ * Copied and modified from arch/i386/kernel/time.c
+ *
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/init.h>
+#include <linux/bcd.h>
+#include <linux/ioport.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/nvram.h>
+#include <asm/sections.h>
+#include <asm/time.h>
+
+#include <platforms/chrp/chrp.h>
+
+#define NVRAM_AS0 0x74
+#define NVRAM_AS1 0x75
+#define NVRAM_DATA 0x77
+
+static int nvram_as1 = NVRAM_AS1;
+static int nvram_as0 = NVRAM_AS0;
+static int nvram_data = NVRAM_DATA;
+
+long __init chrp_time_init(void)
+{
+ struct device_node *rtcs;
+ struct resource r;
+ int base;
+
+ rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00");
+ if (rtcs == NULL)
+ rtcs = of_find_compatible_node(NULL, "rtc", "ds1385-rtc");
+ if (rtcs == NULL)
+ return 0;
+ if (of_address_to_resource(rtcs, 0, &r)) {
+ of_node_put(rtcs);
+ return 0;
+ }
+ of_node_put(rtcs);
+
+ base = r.start;
+ nvram_as1 = 0;
+ nvram_as0 = base;
+ nvram_data = base + 1;
+
+ return 0;
+}
+
+static int chrp_cmos_clock_read(int addr)
+{
+ if (nvram_as1 != 0)
+ outb(addr>>8, nvram_as1);
+ outb(addr, nvram_as0);
+ return (inb(nvram_data));
+}
+
+static void chrp_cmos_clock_write(unsigned long val, int addr)
+{
+ if (nvram_as1 != 0)
+ outb(addr>>8, nvram_as1);
+ outb(addr, nvram_as0);
+ outb(val, nvram_data);
+ return;
+}
+
+/*
+ * Set the hardware clock. -- Cort
+ */
+int chrp_set_rtc_time(struct rtc_time *tmarg)
+{
+ unsigned char save_control, save_freq_select;
+ struct rtc_time tm = *tmarg;
+
+ spin_lock(&rtc_lock);
+
+ save_control = chrp_cmos_clock_read(RTC_CONTROL); /* tell the clock it's being set */
+
+ chrp_cmos_clock_write((save_control|RTC_SET), RTC_CONTROL);
+
+ save_freq_select = chrp_cmos_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */
+
+ chrp_cmos_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ tm.tm_sec = bin2bcd(tm.tm_sec);
+ tm.tm_min = bin2bcd(tm.tm_min);
+ tm.tm_hour = bin2bcd(tm.tm_hour);
+ tm.tm_mon = bin2bcd(tm.tm_mon);
+ tm.tm_mday = bin2bcd(tm.tm_mday);
+ tm.tm_year = bin2bcd(tm.tm_year);
+ }
+ chrp_cmos_clock_write(tm.tm_sec,RTC_SECONDS);
+ chrp_cmos_clock_write(tm.tm_min,RTC_MINUTES);
+ chrp_cmos_clock_write(tm.tm_hour,RTC_HOURS);
+ chrp_cmos_clock_write(tm.tm_mon,RTC_MONTH);
+ chrp_cmos_clock_write(tm.tm_mday,RTC_DAY_OF_MONTH);
+ chrp_cmos_clock_write(tm.tm_year,RTC_YEAR);
+
+ /* The following flags have to be released exactly in this order,
+ * otherwise the DS12887 (popular MC146818A clone with integrated
+ * battery and quartz) will not reset the oscillator and will not
+ * update precisely 500 ms later. You won't find this mentioned in
+ * the Dallas Semiconductor data sheets, but who believes data
+ * sheets anyway ... -- Markus Kuhn
+ */
+ chrp_cmos_clock_write(save_control, RTC_CONTROL);
+ chrp_cmos_clock_write(save_freq_select, RTC_FREQ_SELECT);
+
+ spin_unlock(&rtc_lock);
+ return 0;
+}
+
+void chrp_get_rtc_time(struct rtc_time *tm)
+{
+ unsigned int year, mon, day, hour, min, sec;
+
+ do {
+ sec = chrp_cmos_clock_read(RTC_SECONDS);
+ min = chrp_cmos_clock_read(RTC_MINUTES);
+ hour = chrp_cmos_clock_read(RTC_HOURS);
+ day = chrp_cmos_clock_read(RTC_DAY_OF_MONTH);
+ mon = chrp_cmos_clock_read(RTC_MONTH);
+ year = chrp_cmos_clock_read(RTC_YEAR);
+ } while (sec != chrp_cmos_clock_read(RTC_SECONDS));
+
+ if (!(chrp_cmos_clock_read(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ sec = bcd2bin(sec);
+ min = bcd2bin(min);
+ hour = bcd2bin(hour);
+ day = bcd2bin(day);
+ mon = bcd2bin(mon);
+ year = bcd2bin(year);
+ }
+ if (year < 70)
+ year += 100;
+ tm->tm_sec = sec;
+ tm->tm_min = min;
+ tm->tm_hour = hour;
+ tm->tm_mday = day;
+ tm->tm_mon = mon;
+ tm->tm_year = year;
+}
diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig
new file mode 100644
index 000000000..c6adff216
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/Kconfig
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0
+config EMBEDDED6xx
+ bool "Embedded 6xx/7xx/7xxx-based boards"
+ depends on PPC_BOOK3S_32 && BROKEN_ON_SMP
+
+config LINKSTATION
+ bool "Linkstation / Kurobox(HG) from Buffalo"
+ depends on EMBEDDED6xx
+ select MPIC
+ select FSL_SOC
+ select PPC_UDBG_16550 if SERIAL_8250
+ select DEFAULT_UIMAGE
+ imply MPC10X_BRIDGE if PCI
+ help
+ Select LINKSTATION if configuring for one of PPC- (MPC8241)
+ based NAS systems from Buffalo Technology. So far only
+ KuroboxHG has been tested. In the future classical Kurobox,
+ Linkstation-I HD-HLAN and HD-HGLAN versions, and PPC-based
+ Terastation systems should be supported too.
+
+config STORCENTER
+ bool "IOMEGA StorCenter"
+ depends on EMBEDDED6xx
+ select MPIC
+ select FSL_SOC
+ select PPC_UDBG_16550 if SERIAL_8250
+ imply MPC10X_BRIDGE if PCI
+ help
+ Select STORCENTER if configuring for the iomega StorCenter
+ with an 8241 CPU in it.
+
+config PPC_HOLLY
+ bool "PPC750GX/CL with TSI10x bridge (Hickory/Holly)"
+ depends on EMBEDDED6xx
+ select TSI108_BRIDGE
+ select PPC_UDBG_16550
+ help
+ Select PPC_HOLLY if configuring for an IBM 750GX/CL Eval
+ Board with TSI108/9 bridge (Hickory/Holly)
+
+config MVME5100
+ bool "Motorola/Emerson MVME5100"
+ depends on EMBEDDED6xx
+ select MPIC
+ select FORCE_PCI
+ select PPC_INDIRECT_PCI
+ select PPC_I8259
+ select PPC_HASH_MMU_NATIVE
+ select PPC_UDBG_16550
+ help
+ This option enables support for the Motorola (now Emerson) MVME5100
+ board.
+
+config TSI108_BRIDGE
+ bool
+ select FORCE_PCI
+ select MPIC
+ select MPIC_WEIRD
+
+config MPC10X_BRIDGE
+ bool
+ select PPC_INDIRECT_PCI
+
+config GAMECUBE_COMMON
+ bool
+
+config USBGECKO_UDBG
+ bool "USB Gecko udbg console for the Nintendo GameCube/Wii"
+ depends on GAMECUBE_COMMON
+ help
+ If you say yes to this option, support will be included for the
+ USB Gecko adapter as an udbg console.
+ The USB Gecko is a EXI to USB Serial converter that can be plugged
+ into a memcard slot in the Nintendo GameCube/Wii.
+
+ This driver bypasses the EXI layer completely.
+
+ If in doubt, say N here.
+
+config GAMECUBE
+ bool "Nintendo-GameCube"
+ depends on EMBEDDED6xx
+ select GAMECUBE_COMMON
+ help
+ Select GAMECUBE if configuring for the Nintendo GameCube.
+ More information at: <http://gc-linux.sourceforge.net/>
+
+config WII
+ bool "Nintendo-Wii"
+ depends on EMBEDDED6xx
+ select GAMECUBE_COMMON
+ help
+ Select WII if configuring for the Nintendo Wii.
+ More information at: <http://gc-linux.sourceforge.net/>
diff --git a/arch/powerpc/platforms/embedded6xx/Makefile b/arch/powerpc/platforms/embedded6xx/Makefile
new file mode 100644
index 000000000..7f2a8154e
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the 6xx/7xx/7xxxx linux kernel.
+#
+obj-$(CONFIG_LINKSTATION) += linkstation.o ls_uart.o
+obj-$(CONFIG_STORCENTER) += storcenter.o
+obj-$(CONFIG_PPC_HOLLY) += holly.o
+obj-$(CONFIG_USBGECKO_UDBG) += usbgecko_udbg.o
+obj-$(CONFIG_GAMECUBE_COMMON) += flipper-pic.o
+obj-$(CONFIG_GAMECUBE) += gamecube.o
+obj-$(CONFIG_WII) += wii.o hlwd-pic.o
+obj-$(CONFIG_MVME5100) += mvme5100.o
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
new file mode 100644
index 000000000..4d9200bdb
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/embedded6xx/flipper-pic.c
+ *
+ * Nintendo GameCube/Wii "Flipper" interrupt controller support.
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2007,2008,2009 Albert Herranz
+ */
+#define DRV_MODULE_NAME "flipper-pic"
+#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/io.h>
+
+#include "flipper-pic.h"
+
+#define FLIPPER_NR_IRQS 32
+
+/*
+ * Each interrupt has a corresponding bit in both
+ * the Interrupt Cause (ICR) and Interrupt Mask (IMR) registers.
+ *
+ * Enabling/disabling an interrupt line involves setting/clearing
+ * the corresponding bit in IMR.
+ * Except for the RSW interrupt, all interrupts get deasserted automatically
+ * when the source deasserts the interrupt.
+ */
+#define FLIPPER_ICR 0x00
+#define FLIPPER_ICR_RSS (1<<16) /* reset switch state */
+
+#define FLIPPER_IMR 0x04
+
+#define FLIPPER_RESET 0x24
+
+
+/*
+ * IRQ chip hooks.
+ *
+ */
+
+static void flipper_pic_mask_and_ack(struct irq_data *d)
+{
+ int irq = irqd_to_hwirq(d);
+ void __iomem *io_base = irq_data_get_irq_chip_data(d);
+ u32 mask = 1 << irq;
+
+ clrbits32(io_base + FLIPPER_IMR, mask);
+ /* this is at least needed for RSW */
+ out_be32(io_base + FLIPPER_ICR, mask);
+}
+
+static void flipper_pic_ack(struct irq_data *d)
+{
+ int irq = irqd_to_hwirq(d);
+ void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+ /* this is at least needed for RSW */
+ out_be32(io_base + FLIPPER_ICR, 1 << irq);
+}
+
+static void flipper_pic_mask(struct irq_data *d)
+{
+ int irq = irqd_to_hwirq(d);
+ void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+ clrbits32(io_base + FLIPPER_IMR, 1 << irq);
+}
+
+static void flipper_pic_unmask(struct irq_data *d)
+{
+ int irq = irqd_to_hwirq(d);
+ void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+ setbits32(io_base + FLIPPER_IMR, 1 << irq);
+}
+
+
+static struct irq_chip flipper_pic = {
+ .name = "flipper-pic",
+ .irq_ack = flipper_pic_ack,
+ .irq_mask_ack = flipper_pic_mask_and_ack,
+ .irq_mask = flipper_pic_mask,
+ .irq_unmask = flipper_pic_unmask,
+};
+
+/*
+ * IRQ host hooks.
+ *
+ */
+
+static struct irq_domain *flipper_irq_host;
+
+static int flipper_pic_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ irq_set_chip_data(virq, h->host_data);
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_chip_and_handler(virq, &flipper_pic, handle_level_irq);
+ return 0;
+}
+
+static const struct irq_domain_ops flipper_irq_domain_ops = {
+ .map = flipper_pic_map,
+};
+
+/*
+ * Platform hooks.
+ *
+ */
+
+static void __flipper_quiesce(void __iomem *io_base)
+{
+ /* mask and ack all IRQs */
+ out_be32(io_base + FLIPPER_IMR, 0x00000000);
+ out_be32(io_base + FLIPPER_ICR, 0xffffffff);
+}
+
+static struct irq_domain * __init flipper_pic_init(struct device_node *np)
+{
+ struct device_node *pi;
+ struct irq_domain *irq_domain = NULL;
+ struct resource res;
+ void __iomem *io_base;
+ int retval;
+
+ pi = of_get_parent(np);
+ if (!pi) {
+ pr_err("no parent found\n");
+ goto out;
+ }
+ if (!of_device_is_compatible(pi, "nintendo,flipper-pi")) {
+ pr_err("unexpected parent compatible\n");
+ goto out;
+ }
+
+ retval = of_address_to_resource(pi, 0, &res);
+ if (retval) {
+ pr_err("no io memory range found\n");
+ goto out;
+ }
+ io_base = ioremap(res.start, resource_size(&res));
+
+ pr_info("controller at 0x%pa mapped to 0x%p\n", &res.start, io_base);
+
+ __flipper_quiesce(io_base);
+
+ irq_domain = irq_domain_add_linear(np, FLIPPER_NR_IRQS,
+ &flipper_irq_domain_ops, io_base);
+ if (!irq_domain) {
+ pr_err("failed to allocate irq_domain\n");
+ return NULL;
+ }
+
+out:
+ return irq_domain;
+}
+
+unsigned int flipper_pic_get_irq(void)
+{
+ void __iomem *io_base = flipper_irq_host->host_data;
+ int irq;
+ u32 irq_status;
+
+ irq_status = in_be32(io_base + FLIPPER_ICR) &
+ in_be32(io_base + FLIPPER_IMR);
+ if (irq_status == 0)
+ return 0; /* no more IRQs pending */
+
+ irq = __ffs(irq_status);
+ return irq_linear_revmap(flipper_irq_host, irq);
+}
+
+/*
+ * Probe function.
+ *
+ */
+
+void __init flipper_pic_probe(void)
+{
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-pic");
+ BUG_ON(!np);
+
+ flipper_irq_host = flipper_pic_init(np);
+ BUG_ON(!flipper_irq_host);
+
+ irq_set_default_host(flipper_irq_host);
+
+ of_node_put(np);
+}
+
+/*
+ * Misc functions related to the flipper chipset.
+ *
+ */
+
+/**
+ * flipper_quiesce() - quiesce flipper irq controller
+ *
+ * Mask and ack all interrupt sources.
+ *
+ */
+void flipper_quiesce(void)
+{
+ void __iomem *io_base = flipper_irq_host->host_data;
+
+ __flipper_quiesce(io_base);
+}
+
+/*
+ * Resets the platform.
+ */
+void flipper_platform_reset(void)
+{
+ void __iomem *io_base;
+
+ if (flipper_irq_host && flipper_irq_host->host_data) {
+ io_base = flipper_irq_host->host_data;
+ out_8(io_base + FLIPPER_RESET, 0x00);
+ }
+}
+
+/*
+ * Returns non-zero if the reset button is pressed.
+ */
+int flipper_is_reset_button_pressed(void)
+{
+ void __iomem *io_base;
+ u32 icr;
+
+ if (flipper_irq_host && flipper_irq_host->host_data) {
+ io_base = flipper_irq_host->host_data;
+ icr = in_be32(io_base + FLIPPER_ICR);
+ return !(icr & FLIPPER_ICR_RSS);
+ }
+ return 0;
+}
+
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.h b/arch/powerpc/platforms/embedded6xx/flipper-pic.h
new file mode 100644
index 000000000..024ae70ba
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * arch/powerpc/platforms/embedded6xx/flipper-pic.h
+ *
+ * Nintendo GameCube/Wii "Flipper" interrupt controller support.
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2007,2008,2009 Albert Herranz
+ */
+
+#ifndef __FLIPPER_PIC_H
+#define __FLIPPER_PIC_H
+
+unsigned int flipper_pic_get_irq(void);
+void __init flipper_pic_probe(void);
+
+void flipper_quiesce(void);
+void flipper_platform_reset(void);
+int flipper_is_reset_button_pressed(void);
+
+#endif
diff --git a/arch/powerpc/platforms/embedded6xx/gamecube.c b/arch/powerpc/platforms/embedded6xx/gamecube.c
new file mode 100644
index 000000000..e3b2c7464
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/gamecube.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/embedded6xx/gamecube.c
+ *
+ * Nintendo GameCube board-specific support
+ * Copyright (C) 2004-2009 The GameCube Linux Team
+ * Copyright (C) 2007,2008,2009 Albert Herranz
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/kexec.h>
+#include <linux/seq_file.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+#include "flipper-pic.h"
+#include "usbgecko_udbg.h"
+
+
+static void __noreturn gamecube_spin(void)
+{
+ /* spin until power button pressed */
+ for (;;)
+ cpu_relax();
+}
+
+static void __noreturn gamecube_restart(char *cmd)
+{
+ local_irq_disable();
+ flipper_platform_reset();
+ gamecube_spin();
+}
+
+static void gamecube_power_off(void)
+{
+ local_irq_disable();
+ gamecube_spin();
+}
+
+static void __noreturn gamecube_halt(void)
+{
+ gamecube_restart(NULL);
+}
+
+static int __init gamecube_probe(void)
+{
+ pm_power_off = gamecube_power_off;
+
+ ug_udbg_init();
+
+ return 1;
+}
+
+static void gamecube_shutdown(void)
+{
+ flipper_quiesce();
+}
+
+define_machine(gamecube) {
+ .name = "gamecube",
+ .compatible = "nintendo,gamecube",
+ .probe = gamecube_probe,
+ .restart = gamecube_restart,
+ .halt = gamecube_halt,
+ .init_IRQ = flipper_pic_probe,
+ .get_irq = flipper_pic_get_irq,
+ .progress = udbg_progress,
+ .machine_shutdown = gamecube_shutdown,
+};
+
+
+static const struct of_device_id gamecube_of_bus[] = {
+ { .compatible = "nintendo,flipper", },
+ { },
+};
+
+static int __init gamecube_device_probe(void)
+{
+ of_platform_bus_probe(NULL, gamecube_of_bus, NULL);
+ return 0;
+}
+machine_device_initcall(gamecube, gamecube_device_probe);
+
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
new file mode 100644
index 000000000..4d2d92de3
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/embedded6xx/hlwd-pic.c
+ *
+ * Nintendo Wii "Hollywood" interrupt controller support.
+ * Copyright (C) 2009 The GameCube Linux Team
+ * Copyright (C) 2009 Albert Herranz
+ */
+#define DRV_MODULE_NAME "hlwd-pic"
+#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <asm/io.h>
+
+#include "hlwd-pic.h"
+
+#define HLWD_NR_IRQS 32
+
+/*
+ * Each interrupt has a corresponding bit in both
+ * the Interrupt Cause (ICR) and Interrupt Mask (IMR) registers.
+ *
+ * Enabling/disabling an interrupt line involves asserting/clearing
+ * the corresponding bit in IMR. ACK'ing a request simply involves
+ * asserting the corresponding bit in ICR.
+ */
+#define HW_BROADWAY_ICR 0x00
+#define HW_BROADWAY_IMR 0x04
+#define HW_STARLET_ICR 0x08
+#define HW_STARLET_IMR 0x0c
+
+
+/*
+ * IRQ chip hooks.
+ *
+ */
+
+static void hlwd_pic_mask_and_ack(struct irq_data *d)
+{
+ int irq = irqd_to_hwirq(d);
+ void __iomem *io_base = irq_data_get_irq_chip_data(d);
+ u32 mask = 1 << irq;
+
+ clrbits32(io_base + HW_BROADWAY_IMR, mask);
+ out_be32(io_base + HW_BROADWAY_ICR, mask);
+}
+
+static void hlwd_pic_ack(struct irq_data *d)
+{
+ int irq = irqd_to_hwirq(d);
+ void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+ out_be32(io_base + HW_BROADWAY_ICR, 1 << irq);
+}
+
+static void hlwd_pic_mask(struct irq_data *d)
+{
+ int irq = irqd_to_hwirq(d);
+ void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+ clrbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
+}
+
+static void hlwd_pic_unmask(struct irq_data *d)
+{
+ int irq = irqd_to_hwirq(d);
+ void __iomem *io_base = irq_data_get_irq_chip_data(d);
+
+ setbits32(io_base + HW_BROADWAY_IMR, 1 << irq);
+
+ /* Make sure the ARM (aka. Starlet) doesn't handle this interrupt. */
+ clrbits32(io_base + HW_STARLET_IMR, 1 << irq);
+}
+
+
+static struct irq_chip hlwd_pic = {
+ .name = "hlwd-pic",
+ .irq_ack = hlwd_pic_ack,
+ .irq_mask_ack = hlwd_pic_mask_and_ack,
+ .irq_mask = hlwd_pic_mask,
+ .irq_unmask = hlwd_pic_unmask,
+};
+
+/*
+ * IRQ host hooks.
+ *
+ */
+
+static struct irq_domain *hlwd_irq_host;
+
+static int hlwd_pic_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ irq_set_chip_data(virq, h->host_data);
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_chip_and_handler(virq, &hlwd_pic, handle_level_irq);
+ return 0;
+}
+
+static const struct irq_domain_ops hlwd_irq_domain_ops = {
+ .map = hlwd_pic_map,
+};
+
+static unsigned int __hlwd_pic_get_irq(struct irq_domain *h)
+{
+ void __iomem *io_base = h->host_data;
+ u32 irq_status;
+
+ irq_status = in_be32(io_base + HW_BROADWAY_ICR) &
+ in_be32(io_base + HW_BROADWAY_IMR);
+ if (irq_status == 0)
+ return 0; /* no more IRQs pending */
+
+ return __ffs(irq_status);
+}
+
+static void hlwd_pic_irq_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct irq_domain *irq_domain = irq_desc_get_handler_data(desc);
+ unsigned int hwirq;
+
+ raw_spin_lock(&desc->lock);
+ chip->irq_mask(&desc->irq_data); /* IRQ_LEVEL */
+ raw_spin_unlock(&desc->lock);
+
+ hwirq = __hlwd_pic_get_irq(irq_domain);
+ if (hwirq)
+ generic_handle_domain_irq(irq_domain, hwirq);
+ else
+ pr_err("spurious interrupt!\n");
+
+ raw_spin_lock(&desc->lock);
+ chip->irq_ack(&desc->irq_data); /* IRQ_LEVEL */
+ if (!irqd_irq_disabled(&desc->irq_data) && chip->irq_unmask)
+ chip->irq_unmask(&desc->irq_data);
+ raw_spin_unlock(&desc->lock);
+}
+
+/*
+ * Platform hooks.
+ *
+ */
+
+static void __hlwd_quiesce(void __iomem *io_base)
+{
+ /* mask and ack all IRQs */
+ out_be32(io_base + HW_BROADWAY_IMR, 0);
+ out_be32(io_base + HW_BROADWAY_ICR, 0xffffffff);
+}
+
+static struct irq_domain *__init hlwd_pic_init(struct device_node *np)
+{
+ struct irq_domain *irq_domain;
+ struct resource res;
+ void __iomem *io_base;
+ int retval;
+
+ retval = of_address_to_resource(np, 0, &res);
+ if (retval) {
+ pr_err("no io memory range found\n");
+ return NULL;
+ }
+ io_base = ioremap(res.start, resource_size(&res));
+ if (!io_base) {
+ pr_err("ioremap failed\n");
+ return NULL;
+ }
+
+ pr_info("controller at 0x%pa mapped to 0x%p\n", &res.start, io_base);
+
+ __hlwd_quiesce(io_base);
+
+ irq_domain = irq_domain_add_linear(np, HLWD_NR_IRQS,
+ &hlwd_irq_domain_ops, io_base);
+ if (!irq_domain) {
+ pr_err("failed to allocate irq_domain\n");
+ iounmap(io_base);
+ return NULL;
+ }
+
+ return irq_domain;
+}
+
+unsigned int hlwd_pic_get_irq(void)
+{
+ unsigned int hwirq = __hlwd_pic_get_irq(hlwd_irq_host);
+ return hwirq ? irq_linear_revmap(hlwd_irq_host, hwirq) : 0;
+}
+
+/*
+ * Probe function.
+ *
+ */
+
+void __init hlwd_pic_probe(void)
+{
+ struct irq_domain *host;
+ struct device_node *np;
+ const u32 *interrupts;
+ int cascade_virq;
+
+ for_each_compatible_node(np, NULL, "nintendo,hollywood-pic") {
+ interrupts = of_get_property(np, "interrupts", NULL);
+ if (interrupts) {
+ host = hlwd_pic_init(np);
+ BUG_ON(!host);
+ cascade_virq = irq_of_parse_and_map(np, 0);
+ irq_set_handler_data(cascade_virq, host);
+ irq_set_chained_handler(cascade_virq,
+ hlwd_pic_irq_cascade);
+ hlwd_irq_host = host;
+ of_node_put(np);
+ break;
+ }
+ }
+}
+
+/**
+ * hlwd_quiesce() - quiesce hollywood irq controller
+ *
+ * Mask and ack all interrupt sources.
+ *
+ */
+void hlwd_quiesce(void)
+{
+ void __iomem *io_base = hlwd_irq_host->host_data;
+
+ __hlwd_quiesce(io_base);
+}
+
diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.h b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h
new file mode 100644
index 000000000..c2fa42e19
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * arch/powerpc/platforms/embedded6xx/hlwd-pic.h
+ *
+ * Nintendo Wii "Hollywood" interrupt controller support.
+ * Copyright (C) 2009 The GameCube Linux Team
+ * Copyright (C) 2009 Albert Herranz
+ */
+
+#ifndef __HLWD_PIC_H
+#define __HLWD_PIC_H
+
+extern unsigned int hlwd_pic_get_irq(void);
+void __init hlwd_pic_probe(void);
+extern void hlwd_quiesce(void);
+
+#endif
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
new file mode 100644
index 000000000..ce9e58ee9
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Board setup routines for the IBM 750GX/CL platform w/ TSI10x bridge
+ *
+ * Copyright 2007 IBM Corporation
+ *
+ * Stephen Winiecki <stevewin@us.ibm.com>
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>
+ *
+ * Based on code from mpc7448_hpc2.c
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/serial.h>
+#include <linux/tty.h>
+#include <linux/serial_core.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/extable.h>
+
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/tsi108.h>
+#include <asm/pci-bridge.h>
+#include <asm/reg.h>
+#include <mm/mmu_decl.h>
+#include <asm/tsi108_irq.h>
+#include <asm/tsi108_pci.h>
+#include <asm/mpic.h>
+
+#undef DEBUG
+
+#define HOLLY_PCI_CFG_PHYS 0x7c000000
+
+static int holly_exclude_device(struct pci_controller *hose, u_char bus,
+ u_char devfn)
+{
+ if (bus == 0 && PCI_SLOT(devfn) == 0)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ else
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static void __init holly_remap_bridge(void)
+{
+ u32 lut_val, lut_addr;
+ int i;
+
+ printk(KERN_INFO "Remapping PCI bridge\n");
+
+ /* Re-init the PCI bridge and LUT registers to have mappings that don't
+ * rely on PIBS
+ */
+ lut_addr = 0x900;
+ for (i = 0; i < 31; i++) {
+ tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x00000201);
+ lut_addr += 4;
+ tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x0);
+ lut_addr += 4;
+ }
+
+ /* Reserve the last LUT entry for PCI I/O space */
+ tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x00000241);
+ lut_addr += 4;
+ tsi108_write_reg(TSI108_PB_OFFSET + lut_addr, 0x0);
+
+ /* Map PCI I/O space */
+ tsi108_write_reg(TSI108_PCI_PFAB_IO_UPPER, 0x0);
+ tsi108_write_reg(TSI108_PCI_PFAB_IO, 0x1);
+
+ /* Map PCI CFG space */
+ tsi108_write_reg(TSI108_PCI_PFAB_BAR0_UPPER, 0x0);
+ tsi108_write_reg(TSI108_PCI_PFAB_BAR0, 0x7c000000 | 0x01);
+
+ /* We don't need MEM32 and PRM remapping so disable them */
+ tsi108_write_reg(TSI108_PCI_PFAB_MEM32, 0x0);
+ tsi108_write_reg(TSI108_PCI_PFAB_PFM3, 0x0);
+ tsi108_write_reg(TSI108_PCI_PFAB_PFM4, 0x0);
+
+ /* Set P2O_BAR0 */
+ tsi108_write_reg(TSI108_PCI_P2O_BAR0_UPPER, 0x0);
+ tsi108_write_reg(TSI108_PCI_P2O_BAR0, 0xc0000000);
+
+ /* Init the PCI LUTs to do no remapping */
+ lut_addr = 0x500;
+ lut_val = 0x00000002;
+
+ for (i = 0; i < 32; i++) {
+ tsi108_write_reg(TSI108_PCI_OFFSET + lut_addr, lut_val);
+ lut_addr += 4;
+ tsi108_write_reg(TSI108_PCI_OFFSET + lut_addr, 0x40000000);
+ lut_addr += 4;
+ lut_val += 0x02000000;
+ }
+ tsi108_write_reg(TSI108_PCI_P2O_PAGE_SIZES, 0x00007900);
+
+ /* Set 64-bit PCI bus address for system memory */
+ tsi108_write_reg(TSI108_PCI_P2O_BAR2_UPPER, 0x0);
+ tsi108_write_reg(TSI108_PCI_P2O_BAR2, 0x0);
+}
+
+static void __init holly_init_pci(void)
+{
+ struct device_node *np;
+
+ if (ppc_md.progress)
+ ppc_md.progress("holly_setup_arch():set_bridge", 0);
+
+ /* setup PCI host bridge */
+ holly_remap_bridge();
+
+ np = of_find_node_by_type(NULL, "pci");
+ if (np)
+ tsi108_setup_pci(np, HOLLY_PCI_CFG_PHYS, 1);
+
+ of_node_put(np);
+
+ ppc_md.pci_exclude_device = holly_exclude_device;
+ if (ppc_md.progress)
+ ppc_md.progress("tsi108: resources set", 0x100);
+}
+
+static void __init holly_setup_arch(void)
+{
+ tsi108_csr_vir_base = get_vir_csrbase();
+
+ printk(KERN_INFO "PPC750GX/CL Platform\n");
+}
+
+/*
+ * Interrupt setup and service. Interrupts on the holly come
+ * from the four external INT pins, PCI interrupts are routed via
+ * PCI interrupt control registers, it generates internal IRQ23
+ *
+ * Interrupt routing on the Holly Board:
+ * TSI108:PB_INT[0] -> CPU0:INT#
+ * TSI108:PB_INT[1] -> CPU0:MCP#
+ * TSI108:PB_INT[2] -> N/C
+ * TSI108:PB_INT[3] -> N/C
+ */
+static void __init holly_init_IRQ(void)
+{
+ struct mpic *mpic;
+#ifdef CONFIG_PCI
+ unsigned int cascade_pci_irq;
+ struct device_node *tsi_pci;
+ struct device_node *cascade_node = NULL;
+#endif
+
+ mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
+ MPIC_SPV_EOI | MPIC_NO_PTHROU_DIS | MPIC_REGSET_TSI108,
+ 24, 0,
+ "Tsi108_PIC");
+
+ BUG_ON(mpic == NULL);
+
+ mpic_assign_isu(mpic, 0, mpic->paddr + 0x100);
+
+ mpic_init(mpic);
+
+#ifdef CONFIG_PCI
+ tsi_pci = of_find_node_by_type(NULL, "pci");
+ if (tsi_pci == NULL) {
+ printk(KERN_ERR "%s: No tsi108 pci node found !\n", __func__);
+ return;
+ }
+
+ cascade_node = of_find_node_by_type(NULL, "pic-router");
+ if (cascade_node == NULL) {
+ printk(KERN_ERR "%s: No tsi108 pci cascade node found !\n", __func__);
+ return;
+ }
+
+ cascade_pci_irq = irq_of_parse_and_map(tsi_pci, 0);
+ pr_debug("%s: tsi108 cascade_pci_irq = 0x%x\n", __func__, (u32) cascade_pci_irq);
+ tsi108_pci_int_init(cascade_node);
+ irq_set_handler_data(cascade_pci_irq, mpic);
+ irq_set_chained_handler(cascade_pci_irq, tsi108_irq_cascade);
+
+ of_node_put(tsi_pci);
+ of_node_put(cascade_node);
+#endif
+ /* Configure MPIC outputs to CPU0 */
+ tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
+}
+
+static void holly_show_cpuinfo(struct seq_file *m)
+{
+ seq_printf(m, "vendor\t\t: IBM\n");
+ seq_printf(m, "machine\t\t: PPC750 GX/CL\n");
+}
+
+static void __noreturn holly_restart(char *cmd)
+{
+ __be32 __iomem *ocn_bar1 = NULL;
+ unsigned long bar;
+ struct device_node *bridge = NULL;
+ struct resource res;
+ phys_addr_t addr = 0xc0000000;
+
+ local_irq_disable();
+
+ bridge = of_find_node_by_type(NULL, "tsi-bridge");
+ if (bridge) {
+ of_address_to_resource(bridge, 0, &res);
+ addr = res.start;
+ of_node_put(bridge);
+ }
+ addr += (TSI108_PB_OFFSET + 0x414);
+
+ ocn_bar1 = ioremap(addr, 0x4);
+
+ /* Turn on the BOOT bit so the addresses are correctly
+ * routed to the HLP interface */
+ bar = ioread32be(ocn_bar1);
+ bar |= 2;
+ iowrite32be(bar, ocn_bar1);
+ iosync();
+
+ /* Set SRR0 to the reset vector and turn on MSR_IP */
+ mtspr(SPRN_SRR0, 0xfff00100);
+ mtspr(SPRN_SRR1, MSR_IP);
+
+ /* Do an rfi to jump back to firmware. Somewhat evil,
+ * but it works
+ */
+ __asm__ __volatile__("rfi" : : : "memory");
+
+ /* Spin until reset happens. Shouldn't really get here */
+ for (;;) ;
+}
+
+static int ppc750_machine_check_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *entry;
+
+ /* Are we prepared to handle this fault */
+ if ((entry = search_exception_tables(regs->nip)) != NULL) {
+ tsi108_clear_pci_cfg_error();
+ regs_set_recoverable(regs);
+ regs_set_return_ip(regs, extable_fixup(entry));
+ return 1;
+ }
+ return 0;
+}
+
+define_machine(holly){
+ .name = "PPC750 GX/CL TSI",
+ .compatible = "ibm,holly",
+ .setup_arch = holly_setup_arch,
+ .discover_phbs = holly_init_pci,
+ .init_IRQ = holly_init_IRQ,
+ .show_cpuinfo = holly_show_cpuinfo,
+ .get_irq = mpic_get_irq,
+ .restart = holly_restart,
+ .machine_check_exception = ppc750_machine_check_exception,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c
new file mode 100644
index 000000000..9c10aac40
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/linkstation.c
@@ -0,0 +1,162 @@
+/*
+ * Board setup routines for the Buffalo Linkstation / Kurobox Platform.
+ *
+ * Copyright (C) 2006 G. Liakhovetski (g.liakhovetski@gmx.de)
+ *
+ * Based on sandpoint.c by Mark A. Greer
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/initrd.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/mpic.h>
+#include <asm/pci-bridge.h>
+
+#include "mpc10x.h"
+
+static const struct of_device_id of_bus_ids[] __initconst = {
+ { .type = "soc", },
+ { .compatible = "simple-bus", },
+ {},
+};
+
+static int __init declare_of_platform_devices(void)
+{
+ of_platform_bus_probe(NULL, of_bus_ids, NULL);
+ return 0;
+}
+machine_device_initcall(linkstation, declare_of_platform_devices);
+
+static int __init linkstation_add_bridge(struct device_node *dev)
+{
+#ifdef CONFIG_PCI
+ int len;
+ struct pci_controller *hose;
+ const int *bus_range;
+
+ printk("Adding PCI host bridge %pOF\n", dev);
+
+ bus_range = of_get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int))
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
+
+ hose = pcibios_alloc_controller(dev);
+ if (hose == NULL)
+ return -ENOMEM;
+ hose->first_busno = bus_range ? bus_range[0] : 0;
+ hose->last_busno = bus_range ? bus_range[1] : 0xff;
+ setup_indirect_pci(hose, 0xfec00000, 0xfee00000, 0);
+
+ /* Interpret the "ranges" property */
+ /* This also maps the I/O region and sets isa_io/mem_base */
+ pci_process_bridge_OF_ranges(hose, dev, 1);
+#endif
+ return 0;
+}
+
+static void __init linkstation_setup_arch(void)
+{
+ printk(KERN_INFO "BUFFALO Network Attached Storage Series\n");
+ printk(KERN_INFO "(C) 2002-2005 BUFFALO INC.\n");
+}
+
+static void __init linkstation_setup_pci(void)
+{
+ struct device_node *np;
+
+ /* Lookup PCI host bridges */
+ for_each_compatible_node(np, "pci", "mpc10x-pci")
+ linkstation_add_bridge(np);
+}
+
+/*
+ * Interrupt setup and service. Interrupts on the linkstation come
+ * from the four PCI slots plus onboard 8241 devices: I2C, DUART.
+ */
+static void __init linkstation_init_IRQ(void)
+{
+ struct mpic *mpic;
+
+ mpic = mpic_alloc(NULL, 0, 0, 4, 0, " EPIC ");
+ BUG_ON(mpic == NULL);
+
+ /* PCI IRQs */
+ mpic_assign_isu(mpic, 0, mpic->paddr + 0x10200);
+
+ /* I2C */
+ mpic_assign_isu(mpic, 1, mpic->paddr + 0x11000);
+
+ /* ttyS0, ttyS1 */
+ mpic_assign_isu(mpic, 2, mpic->paddr + 0x11100);
+
+ mpic_init(mpic);
+}
+
+extern void avr_uart_configure(void);
+extern void avr_uart_send(const char);
+
+static void __noreturn linkstation_restart(char *cmd)
+{
+ local_irq_disable();
+
+ /* Reset system via AVR */
+ avr_uart_configure();
+ /* Send reboot command */
+ avr_uart_send('C');
+
+ for(;;) /* Spin until reset happens */
+ avr_uart_send('G'); /* "kick" */
+}
+
+static void __noreturn linkstation_power_off(void)
+{
+ local_irq_disable();
+
+ /* Power down system via AVR */
+ avr_uart_configure();
+ /* send shutdown command */
+ avr_uart_send('E');
+
+ for(;;) /* Spin until power-off happens */
+ avr_uart_send('G'); /* "kick" */
+ /* NOTREACHED */
+}
+
+static void __noreturn linkstation_halt(void)
+{
+ linkstation_power_off();
+ /* NOTREACHED */
+}
+
+static void linkstation_show_cpuinfo(struct seq_file *m)
+{
+ seq_printf(m, "vendor\t\t: Buffalo Technology\n");
+ seq_printf(m, "machine\t\t: Linkstation I/Kurobox(HG)\n");
+}
+
+static int __init linkstation_probe(void)
+{
+ pm_power_off = linkstation_power_off;
+
+ return 1;
+}
+
+define_machine(linkstation){
+ .name = "Buffalo Linkstation",
+ .compatible = "linkstation",
+ .probe = linkstation_probe,
+ .setup_arch = linkstation_setup_arch,
+ .discover_phbs = linkstation_setup_pci,
+ .init_IRQ = linkstation_init_IRQ,
+ .show_cpuinfo = linkstation_show_cpuinfo,
+ .get_irq = mpic_get_irq,
+ .restart = linkstation_restart,
+ .halt = linkstation_halt,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/ls_uart.c b/arch/powerpc/platforms/embedded6xx/ls_uart.c
new file mode 100644
index 000000000..6c1dbf8ae
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/ls_uart.c
@@ -0,0 +1,147 @@
+/*
+ * AVR power-management chip interface for the Buffalo Linkstation /
+ * Kurobox Platform.
+ *
+ * Author: 2006 (c) G. Liakhovetski
+ * g.liakhovetski@gmx.de
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+#include <linux/workqueue.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/serial_reg.h>
+#include <linux/serial_8250.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/io.h>
+#include <asm/termbits.h>
+
+#include "mpc10x.h"
+
+static void __iomem *avr_addr;
+static unsigned long avr_clock;
+
+static struct work_struct wd_work;
+
+static void wd_stop(struct work_struct *unused)
+{
+ const char string[] = "AAAAFFFFJJJJ>>>>VVVV>>>>ZZZZVVVVKKKK";
+ int i = 0, rescue = 8;
+ int len = strlen(string);
+
+ while (rescue--) {
+ int j;
+ char lsr = in_8(avr_addr + UART_LSR);
+
+ if (lsr & (UART_LSR_THRE | UART_LSR_TEMT)) {
+ for (j = 0; j < 16 && i < len; j++, i++)
+ out_8(avr_addr + UART_TX, string[i]);
+ if (i == len) {
+ /* Read "OK" back: 4ms for the last "KKKK"
+ plus a couple bytes back */
+ msleep(7);
+ printk("linkstation: disarming the AVR watchdog: ");
+ while (in_8(avr_addr + UART_LSR) & UART_LSR_DR)
+ printk("%c", in_8(avr_addr + UART_RX));
+ break;
+ }
+ }
+ msleep(17);
+ }
+ printk("\n");
+}
+
+#define AVR_QUOT(clock) ((clock) + 8 * 9600) / (16 * 9600)
+
+void avr_uart_configure(void)
+{
+ unsigned char cval = UART_LCR_WLEN8;
+ unsigned int quot = AVR_QUOT(avr_clock);
+
+ if (!avr_addr || !avr_clock)
+ return;
+
+ out_8(avr_addr + UART_LCR, cval); /* initialise UART */
+ out_8(avr_addr + UART_MCR, 0);
+ out_8(avr_addr + UART_IER, 0);
+
+ cval |= UART_LCR_STOP | UART_LCR_PARITY | UART_LCR_EPAR;
+
+ out_8(avr_addr + UART_LCR, cval); /* Set character format */
+
+ out_8(avr_addr + UART_LCR, cval | UART_LCR_DLAB); /* set DLAB */
+ out_8(avr_addr + UART_DLL, quot & 0xff); /* LS of divisor */
+ out_8(avr_addr + UART_DLM, quot >> 8); /* MS of divisor */
+ out_8(avr_addr + UART_LCR, cval); /* reset DLAB */
+ out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO); /* enable FIFO */
+}
+
+void avr_uart_send(const char c)
+{
+ if (!avr_addr || !avr_clock)
+ return;
+
+ out_8(avr_addr + UART_TX, c);
+ out_8(avr_addr + UART_TX, c);
+ out_8(avr_addr + UART_TX, c);
+ out_8(avr_addr + UART_TX, c);
+}
+
+static void __init ls_uart_init(void)
+{
+ local_irq_disable();
+
+#ifndef CONFIG_SERIAL_8250
+ out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO); /* enable FIFO */
+ out_8(avr_addr + UART_FCR, UART_FCR_ENABLE_FIFO |
+ UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); /* clear FIFOs */
+ out_8(avr_addr + UART_FCR, 0);
+ out_8(avr_addr + UART_IER, 0);
+
+ /* Clear up interrupts */
+ (void) in_8(avr_addr + UART_LSR);
+ (void) in_8(avr_addr + UART_RX);
+ (void) in_8(avr_addr + UART_IIR);
+ (void) in_8(avr_addr + UART_MSR);
+#endif
+ avr_uart_configure();
+
+ local_irq_enable();
+}
+
+static int __init ls_uarts_init(void)
+{
+ struct device_node *avr;
+ struct resource res;
+ int len, ret;
+
+ avr = of_find_node_by_path("/soc10x/serial@80004500");
+ if (!avr)
+ return -EINVAL;
+
+ avr_clock = *(u32*)of_get_property(avr, "clock-frequency", &len);
+ if (!avr_clock)
+ return -EINVAL;
+
+ ret = of_address_to_resource(avr, 0, &res);
+ if (ret)
+ return ret;
+
+ of_node_put(avr);
+
+ avr_addr = ioremap(res.start, 32);
+ if (!avr_addr)
+ return -EFAULT;
+
+ ls_uart_init();
+
+ INIT_WORK(&wd_work, wd_stop);
+ schedule_work(&wd_work);
+
+ return 0;
+}
+
+machine_late_initcall(linkstation, ls_uarts_init);
diff --git a/arch/powerpc/platforms/embedded6xx/mpc10x.h b/arch/powerpc/platforms/embedded6xx/mpc10x.h
new file mode 100644
index 000000000..5ad12023e
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/mpc10x.h
@@ -0,0 +1,159 @@
+/*
+ * Common routines for the Motorola SPS MPC106/8240/107 Host bridge/Mem
+ * ctlr/EPIC/etc.
+ *
+ * Author: Mark A. Greer
+ * mgreer@mvista.com
+ *
+ * 2001 (c) MontaVista, Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#ifndef __PPC_KERNEL_MPC10X_H
+#define __PPC_KERNEL_MPC10X_H
+
+#include <linux/pci_ids.h>
+#include <asm/pci-bridge.h>
+
+/*
+ * The values here don't completely map everything but should work in most
+ * cases.
+ *
+ * MAP A (PReP Map)
+ * Processor: 0x80000000 - 0x807fffff -> PCI I/O: 0x00000000 - 0x007fffff
+ * Processor: 0xc0000000 - 0xdfffffff -> PCI MEM: 0x00000000 - 0x1fffffff
+ * PCI MEM: 0x80000000 -> Processor System Memory: 0x00000000
+ *
+ * MAP B (CHRP Map)
+ * Processor: 0xfe000000 - 0xfebfffff -> PCI I/O: 0x00000000 - 0x00bfffff
+ * Processor: 0x80000000 - 0xbfffffff -> PCI MEM: 0x80000000 - 0xbfffffff
+ * PCI MEM: 0x00000000 -> Processor System Memory: 0x00000000
+ */
+
+/*
+ * Define the vendor/device IDs for the various bridges--should be added to
+ * <linux/pci_ids.h>
+ */
+#define MPC10X_BRIDGE_106 ((PCI_DEVICE_ID_MOTOROLA_MPC106 << 16) | \
+ PCI_VENDOR_ID_MOTOROLA)
+#define MPC10X_BRIDGE_8240 ((0x0003 << 16) | PCI_VENDOR_ID_MOTOROLA)
+#define MPC10X_BRIDGE_107 ((0x0004 << 16) | PCI_VENDOR_ID_MOTOROLA)
+#define MPC10X_BRIDGE_8245 ((0x0006 << 16) | PCI_VENDOR_ID_MOTOROLA)
+
+/* Define the type of map to use */
+#define MPC10X_MEM_MAP_A 1
+#define MPC10X_MEM_MAP_B 2
+
+/* Map A (PReP Map) Defines */
+#define MPC10X_MAPA_CNFG_ADDR 0x80000cf8
+#define MPC10X_MAPA_CNFG_DATA 0x80000cfc
+
+#define MPC10X_MAPA_ISA_IO_BASE 0x80000000
+#define MPC10X_MAPA_ISA_MEM_BASE 0xc0000000
+#define MPC10X_MAPA_DRAM_OFFSET 0x80000000
+
+#define MPC10X_MAPA_PCI_INTACK_ADDR 0xbffffff0
+#define MPC10X_MAPA_PCI_IO_START 0x00000000
+#define MPC10X_MAPA_PCI_IO_END (0x00800000 - 1)
+#define MPC10X_MAPA_PCI_MEM_START 0x00000000
+#define MPC10X_MAPA_PCI_MEM_END (0x20000000 - 1)
+
+#define MPC10X_MAPA_PCI_MEM_OFFSET (MPC10X_MAPA_ISA_MEM_BASE - \
+ MPC10X_MAPA_PCI_MEM_START)
+
+/* Map B (CHRP Map) Defines */
+#define MPC10X_MAPB_CNFG_ADDR 0xfec00000
+#define MPC10X_MAPB_CNFG_DATA 0xfee00000
+
+#define MPC10X_MAPB_ISA_IO_BASE 0xfe000000
+#define MPC10X_MAPB_ISA_MEM_BASE 0x80000000
+#define MPC10X_MAPB_DRAM_OFFSET 0x00000000
+
+#define MPC10X_MAPB_PCI_INTACK_ADDR 0xfef00000
+#define MPC10X_MAPB_PCI_IO_START 0x00000000
+#define MPC10X_MAPB_PCI_IO_END (0x00c00000 - 1)
+#define MPC10X_MAPB_PCI_MEM_START 0x80000000
+#define MPC10X_MAPB_PCI_MEM_END (0xc0000000 - 1)
+
+#define MPC10X_MAPB_PCI_MEM_OFFSET (MPC10X_MAPB_ISA_MEM_BASE - \
+ MPC10X_MAPB_PCI_MEM_START)
+
+/* Miscellaneous Configuration register offsets */
+#define MPC10X_CFG_PIR_REG 0x09
+#define MPC10X_CFG_PIR_HOST_BRIDGE 0x00
+#define MPC10X_CFG_PIR_AGENT 0x01
+
+#define MPC10X_CFG_EUMBBAR 0x78
+
+#define MPC10X_CFG_PICR1_REG 0xa8
+#define MPC10X_CFG_PICR1_ADDR_MAP_MASK 0x00010000
+#define MPC10X_CFG_PICR1_ADDR_MAP_A 0x00010000
+#define MPC10X_CFG_PICR1_ADDR_MAP_B 0x00000000
+#define MPC10X_CFG_PICR1_SPEC_PCI_RD 0x00000004
+#define MPC10X_CFG_PICR1_ST_GATH_EN 0x00000040
+
+#define MPC10X_CFG_PICR2_REG 0xac
+#define MPC10X_CFG_PICR2_COPYBACK_OPT 0x00000001
+
+#define MPC10X_CFG_MAPB_OPTIONS_REG 0xe0
+#define MPC10X_CFG_MAPB_OPTIONS_CFAE 0x80 /* CPU_FD_ALIAS_EN */
+#define MPC10X_CFG_MAPB_OPTIONS_PFAE 0x40 /* PCI_FD_ALIAS_EN */
+#define MPC10X_CFG_MAPB_OPTIONS_DR 0x20 /* DLL_RESET */
+#define MPC10X_CFG_MAPB_OPTIONS_PCICH 0x08 /* PCI_COMPATIBILITY_HOLE */
+#define MPC10X_CFG_MAPB_OPTIONS_PROCCH 0x04 /* PROC_COMPATIBILITY_HOLE */
+
+/* Define offsets for the memory controller registers in the config space */
+#define MPC10X_MCTLR_MEM_START_1 0x80 /* Banks 0-3 */
+#define MPC10X_MCTLR_MEM_START_2 0x84 /* Banks 4-7 */
+#define MPC10X_MCTLR_EXT_MEM_START_1 0x88 /* Banks 0-3 */
+#define MPC10X_MCTLR_EXT_MEM_START_2 0x8c /* Banks 4-7 */
+
+#define MPC10X_MCTLR_MEM_END_1 0x90 /* Banks 0-3 */
+#define MPC10X_MCTLR_MEM_END_2 0x94 /* Banks 4-7 */
+#define MPC10X_MCTLR_EXT_MEM_END_1 0x98 /* Banks 0-3 */
+#define MPC10X_MCTLR_EXT_MEM_END_2 0x9c /* Banks 4-7 */
+
+#define MPC10X_MCTLR_MEM_BANK_ENABLES 0xa0
+
+/* Define some offset in the EUMB */
+#define MPC10X_EUMB_SIZE 0x00100000 /* Total EUMB size (1MB) */
+
+#define MPC10X_EUMB_MU_OFFSET 0x00000000 /* Msg Unit reg offset */
+#define MPC10X_EUMB_MU_SIZE 0x00001000 /* Msg Unit reg size */
+#define MPC10X_EUMB_DMA_OFFSET 0x00001000 /* DMA Unit reg offset */
+#define MPC10X_EUMB_DMA_SIZE 0x00001000 /* DMA Unit reg size */
+#define MPC10X_EUMB_ATU_OFFSET 0x00002000 /* Addr xlate reg offset */
+#define MPC10X_EUMB_ATU_SIZE 0x00001000 /* Addr xlate reg size */
+#define MPC10X_EUMB_I2C_OFFSET 0x00003000 /* I2C Unit reg offset */
+#define MPC10X_EUMB_I2C_SIZE 0x00001000 /* I2C Unit reg size */
+#define MPC10X_EUMB_DUART_OFFSET 0x00004000 /* DUART Unit reg offset (8245) */
+#define MPC10X_EUMB_DUART_SIZE 0x00001000 /* DUART Unit reg size (8245) */
+#define MPC10X_EUMB_EPIC_OFFSET 0x00040000 /* EPIC offset in EUMB */
+#define MPC10X_EUMB_EPIC_SIZE 0x00030000 /* EPIC size */
+#define MPC10X_EUMB_PM_OFFSET 0x000fe000 /* Performance Monitor reg offset (8245) */
+#define MPC10X_EUMB_PM_SIZE 0x00001000 /* Performance Monitor reg size (8245) */
+#define MPC10X_EUMB_WP_OFFSET 0x000ff000 /* Data path diagnostic, watchpoint reg offset */
+#define MPC10X_EUMB_WP_SIZE 0x00001000 /* Data path diagnostic, watchpoint reg size */
+
+enum ppc_sys_devices {
+ MPC10X_IIC1,
+ MPC10X_DMA0,
+ MPC10X_DMA1,
+ MPC10X_UART0,
+ MPC10X_UART1,
+ NUM_PPC_SYS_DEVS,
+};
+
+int mpc10x_bridge_init(struct pci_controller *hose,
+ uint current_map,
+ uint new_map,
+ uint phys_eumb_base);
+unsigned long mpc10x_get_mem_size(uint mem_map);
+int mpc10x_enable_store_gathering(struct pci_controller *hose);
+int mpc10x_disable_store_gathering(struct pci_controller *hose);
+
+/* For MPC107 boards that use the built-in openpic */
+void mpc10x_set_openpic(void);
+
+#endif /* __PPC_KERNEL_MPC10X_H */
diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c
new file mode 100644
index 000000000..00bec0f05
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Board setup routines for the Motorola/Emerson MVME5100.
+ *
+ * Copyright 2013 CSC Australia Pty. Ltd.
+ *
+ * Based on earlier code by:
+ *
+ * Matt Porter, MontaVista Software Inc.
+ * Copyright 2001 MontaVista Software Inc.
+ *
+ * Author: Stephen Chivers <schivers@csc.com>
+ */
+
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+#include <asm/i8259.h>
+#include <asm/pci-bridge.h>
+#include <asm/mpic.h>
+#include <mm/mmu_decl.h>
+#include <asm/udbg.h>
+
+#define HAWK_MPIC_SIZE 0x00040000U
+#define MVME5100_PCI_MEM_OFFSET 0x00000000
+
+/* Board register addresses. */
+#define BOARD_STATUS_REG 0xfef88080
+#define BOARD_MODFAIL_REG 0xfef88090
+#define BOARD_MODRST_REG 0xfef880a0
+#define BOARD_TBEN_REG 0xfef880c0
+#define BOARD_SW_READ_REG 0xfef880e0
+#define BOARD_GEO_ADDR_REG 0xfef880e8
+#define BOARD_EXT_FEATURE1_REG 0xfef880f0
+#define BOARD_EXT_FEATURE2_REG 0xfef88100
+
+static phys_addr_t pci_membase;
+static u_char *restart;
+
+static void mvme5100_8259_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int cascade_irq = i8259_irq();
+
+ if (cascade_irq)
+ generic_handle_irq(cascade_irq);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+static void __init mvme5100_pic_init(void)
+{
+ struct mpic *mpic;
+ struct device_node *np;
+ struct device_node *cp = NULL;
+ unsigned int cirq;
+ unsigned long intack = 0;
+ const u32 *prop = NULL;
+
+ np = of_find_node_by_type(NULL, "open-pic");
+ if (!np) {
+ pr_err("Could not find open-pic node\n");
+ return;
+ }
+
+ mpic = mpic_alloc(np, pci_membase, 0, 16, 256, " OpenPIC ");
+
+ BUG_ON(mpic == NULL);
+ of_node_put(np);
+
+ mpic_assign_isu(mpic, 0, pci_membase + 0x10000);
+
+ mpic_init(mpic);
+
+ cp = of_find_compatible_node(NULL, NULL, "chrp,iic");
+ if (cp == NULL) {
+ pr_warn("mvme5100_pic_init: couldn't find i8259\n");
+ return;
+ }
+
+ cirq = irq_of_parse_and_map(cp, 0);
+ if (!cirq) {
+ pr_warn("mvme5100_pic_init: no cascade interrupt?\n");
+ return;
+ }
+
+ np = of_find_compatible_node(NULL, "pci", "mpc10x-pci");
+ if (np) {
+ prop = of_get_property(np, "8259-interrupt-acknowledge", NULL);
+
+ if (prop)
+ intack = prop[0];
+
+ of_node_put(np);
+ }
+
+ if (intack)
+ pr_debug("mvme5100_pic_init: PCI 8259 intack at 0x%016lx\n",
+ intack);
+
+ i8259_init(cp, intack);
+ of_node_put(cp);
+ irq_set_chained_handler(cirq, mvme5100_8259_cascade);
+}
+
+static int __init mvme5100_add_bridge(struct device_node *dev)
+{
+ const int *bus_range;
+ int len;
+ struct pci_controller *hose;
+ unsigned short devid;
+
+ pr_info("Adding PCI host bridge %pOF\n", dev);
+
+ bus_range = of_get_property(dev, "bus-range", &len);
+
+ hose = pcibios_alloc_controller(dev);
+ if (hose == NULL)
+ return -ENOMEM;
+
+ hose->first_busno = bus_range ? bus_range[0] : 0;
+ hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+ setup_indirect_pci(hose, 0xfe000cf8, 0xfe000cfc, 0);
+
+ pci_process_bridge_OF_ranges(hose, dev, 1);
+
+ early_read_config_word(hose, 0, 0, PCI_DEVICE_ID, &devid);
+
+ if (devid != PCI_DEVICE_ID_MOTOROLA_HAWK) {
+ pr_err("HAWK PHB not present?\n");
+ return 0;
+ }
+
+ early_read_config_dword(hose, 0, 0, PCI_BASE_ADDRESS_1, &pci_membase);
+
+ if (pci_membase == 0) {
+ pr_err("HAWK PHB mibar not correctly set?\n");
+ return 0;
+ }
+
+ pr_info("mvme5100_pic_init: pci_membase: %x\n", pci_membase);
+
+ return 0;
+}
+
+static const struct of_device_id mvme5100_of_bus_ids[] __initconst = {
+ { .compatible = "hawk-bridge", },
+ {},
+};
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme5100_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("mvme5100_setup_arch()", 0);
+
+ restart = ioremap(BOARD_MODRST_REG, 4);
+}
+
+static void __init mvme5100_setup_pci(void)
+{
+ struct device_node *np;
+
+ for_each_compatible_node(np, "pci", "hawk-pci")
+ mvme5100_add_bridge(np);
+}
+
+static void mvme5100_show_cpuinfo(struct seq_file *m)
+{
+ seq_puts(m, "Vendor\t\t: Motorola/Emerson\n");
+ seq_puts(m, "Machine\t\t: MVME5100\n");
+}
+
+static void __noreturn mvme5100_restart(char *cmd)
+{
+
+ local_irq_disable();
+ mtmsr(mfmsr() | MSR_IP);
+
+ out_8((u_char *) restart, 0x01);
+
+ while (1)
+ ;
+}
+
+static int __init probe_of_platform_devices(void)
+{
+
+ of_platform_bus_probe(NULL, mvme5100_of_bus_ids, NULL);
+ return 0;
+}
+
+machine_device_initcall(mvme5100, probe_of_platform_devices);
+
+define_machine(mvme5100) {
+ .name = "MVME5100",
+ .compatible = "MVME5100",
+ .setup_arch = mvme5100_setup_arch,
+ .discover_phbs = mvme5100_setup_pci,
+ .init_IRQ = mvme5100_pic_init,
+ .show_cpuinfo = mvme5100_show_cpuinfo,
+ .get_irq = mpic_get_irq,
+ .restart = mvme5100_restart,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/storcenter.c b/arch/powerpc/platforms/embedded6xx/storcenter.c
new file mode 100644
index 000000000..e49880e8d
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/storcenter.c
@@ -0,0 +1,121 @@
+/*
+ * Board setup routines for the storcenter
+ *
+ * Copyright 2007 (C) Oyvind Repvik (nail@nslu2-linux.org)
+ * Copyright 2007 Andy Wilcox, Jon Loeliger
+ *
+ * Based on linkstation.c by G. Liakhovetski
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/initrd.h>
+#include <linux/of_platform.h>
+
+#include <asm/time.h>
+#include <asm/mpic.h>
+#include <asm/pci-bridge.h>
+
+#include "mpc10x.h"
+
+
+static const struct of_device_id storcenter_of_bus[] __initconst = {
+ { .name = "soc", },
+ {},
+};
+
+static int __init storcenter_device_probe(void)
+{
+ of_platform_bus_probe(NULL, storcenter_of_bus, NULL);
+ return 0;
+}
+machine_device_initcall(storcenter, storcenter_device_probe);
+
+
+static int __init storcenter_add_bridge(struct device_node *dev)
+{
+#ifdef CONFIG_PCI
+ int len;
+ struct pci_controller *hose;
+ const int *bus_range;
+
+ printk("Adding PCI host bridge %pOF\n", dev);
+
+ hose = pcibios_alloc_controller(dev);
+ if (hose == NULL)
+ return -ENOMEM;
+
+ bus_range = of_get_property(dev, "bus-range", &len);
+ hose->first_busno = bus_range ? bus_range[0] : 0;
+ hose->last_busno = bus_range ? bus_range[1] : 0xff;
+
+ setup_indirect_pci(hose, MPC10X_MAPB_CNFG_ADDR, MPC10X_MAPB_CNFG_DATA, 0);
+
+ /* Interpret the "ranges" property */
+ /* This also maps the I/O region and sets isa_io/mem_base */
+ pci_process_bridge_OF_ranges(hose, dev, 1);
+#endif
+
+ return 0;
+}
+
+static void __init storcenter_setup_arch(void)
+{
+ printk(KERN_INFO "IOMEGA StorCenter\n");
+}
+
+static void __init storcenter_setup_pci(void)
+{
+ struct device_node *np;
+
+ /* Lookup PCI host bridges */
+ for_each_compatible_node(np, "pci", "mpc10x-pci")
+ storcenter_add_bridge(np);
+}
+
+/*
+ * Interrupt setup and service. Interrupts on the turbostation come
+ * from the four PCI slots plus onboard 8241 devices: I2C, DUART.
+ */
+static void __init storcenter_init_IRQ(void)
+{
+ struct mpic *mpic;
+
+ mpic = mpic_alloc(NULL, 0, 0, 16, 0, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+
+ /*
+ * 16 Serial Interrupts followed by 16 Internal Interrupts.
+ * I2C is the second internal, so it is at 17, 0x11020.
+ */
+ mpic_assign_isu(mpic, 0, mpic->paddr + 0x10200);
+ mpic_assign_isu(mpic, 1, mpic->paddr + 0x11000);
+
+ mpic_init(mpic);
+}
+
+static void __noreturn storcenter_restart(char *cmd)
+{
+ local_irq_disable();
+
+ /* Set exception prefix high - to the firmware */
+ mtmsr(mfmsr() | MSR_IP);
+ isync();
+
+ /* Wait for reset to happen */
+ for (;;) ;
+}
+
+define_machine(storcenter){
+ .name = "IOMEGA StorCenter",
+ .compatible = "iomega,storcenter",
+ .setup_arch = storcenter_setup_arch,
+ .discover_phbs = storcenter_setup_pci,
+ .init_IRQ = storcenter_init_IRQ,
+ .get_irq = mpic_get_irq,
+ .restart = storcenter_restart,
+};
diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
new file mode 100644
index 000000000..221577f32
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
+ *
+ * udbg serial input/output routines for the USB Gecko adapter.
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+#include <linux/of_address.h>
+
+#include <mm/mmu_decl.h>
+
+#include <asm/io.h>
+#include <asm/udbg.h>
+#include <asm/fixmap.h>
+
+#include "usbgecko_udbg.h"
+
+
+#define EXI_CLK_32MHZ 5
+
+#define EXI_CSR 0x00
+#define EXI_CSR_CLKMASK (0x7<<4)
+#define EXI_CSR_CLK_32MHZ (EXI_CLK_32MHZ<<4)
+#define EXI_CSR_CSMASK (0x7<<7)
+#define EXI_CSR_CS_0 (0x1<<7) /* Chip Select 001 */
+
+#define EXI_CR 0x0c
+#define EXI_CR_TSTART (1<<0)
+#define EXI_CR_WRITE (1<<2)
+#define EXI_CR_READ_WRITE (2<<2)
+#define EXI_CR_TLEN(len) (((len)-1)<<4)
+
+#define EXI_DATA 0x10
+
+#define UG_READ_ATTEMPTS 100
+#define UG_WRITE_ATTEMPTS 100
+
+
+static void __iomem *ug_io_base;
+
+/*
+ * Performs one input/output transaction between the exi host and the usbgecko.
+ */
+static u32 ug_io_transaction(u32 in)
+{
+ u32 __iomem *csr_reg = ug_io_base + EXI_CSR;
+ u32 __iomem *data_reg = ug_io_base + EXI_DATA;
+ u32 __iomem *cr_reg = ug_io_base + EXI_CR;
+ u32 csr, data, cr;
+
+ /* select */
+ csr = EXI_CSR_CLK_32MHZ | EXI_CSR_CS_0;
+ out_be32(csr_reg, csr);
+
+ /* read/write */
+ data = in;
+ out_be32(data_reg, data);
+ cr = EXI_CR_TLEN(2) | EXI_CR_READ_WRITE | EXI_CR_TSTART;
+ out_be32(cr_reg, cr);
+
+ while (in_be32(cr_reg) & EXI_CR_TSTART)
+ barrier();
+
+ /* deselect */
+ out_be32(csr_reg, 0);
+
+ /* result */
+ data = in_be32(data_reg);
+
+ return data;
+}
+
+/*
+ * Returns true if an usbgecko adapter is found.
+ */
+static int ug_is_adapter_present(void)
+{
+ if (!ug_io_base)
+ return 0;
+
+ return ug_io_transaction(0x90000000) == 0x04700000;
+}
+
+/*
+ * Returns true if the TX fifo is ready for transmission.
+ */
+static int ug_is_txfifo_ready(void)
+{
+ return ug_io_transaction(0xc0000000) & 0x04000000;
+}
+
+/*
+ * Tries to transmit a character.
+ * If the TX fifo is not ready the result is undefined.
+ */
+static void ug_raw_putc(char ch)
+{
+ ug_io_transaction(0xb0000000 | (ch << 20));
+}
+
+/*
+ * Transmits a character.
+ * It silently fails if the TX fifo is not ready after a number of retries.
+ */
+static void ug_putc(char ch)
+{
+ int count = UG_WRITE_ATTEMPTS;
+
+ if (!ug_io_base)
+ return;
+
+ if (ch == '\n')
+ ug_putc('\r');
+
+ while (!ug_is_txfifo_ready() && count--)
+ barrier();
+ if (count >= 0)
+ ug_raw_putc(ch);
+}
+
+/*
+ * Returns true if the RX fifo is ready for transmission.
+ */
+static int ug_is_rxfifo_ready(void)
+{
+ return ug_io_transaction(0xd0000000) & 0x04000000;
+}
+
+/*
+ * Tries to receive a character.
+ * If a character is unavailable the function returns -1.
+ */
+static int ug_raw_getc(void)
+{
+ u32 data = ug_io_transaction(0xa0000000);
+ if (data & 0x08000000)
+ return (data >> 16) & 0xff;
+ else
+ return -1;
+}
+
+/*
+ * Receives a character.
+ * It fails if the RX fifo is not ready after a number of retries.
+ */
+static int ug_getc(void)
+{
+ int count = UG_READ_ATTEMPTS;
+
+ if (!ug_io_base)
+ return -1;
+
+ while (!ug_is_rxfifo_ready() && count--)
+ barrier();
+ return ug_raw_getc();
+}
+
+/*
+ * udbg functions.
+ *
+ */
+
+/*
+ * Transmits a character.
+ */
+static void ug_udbg_putc(char ch)
+{
+ ug_putc(ch);
+}
+
+/*
+ * Receives a character. Waits until a character is available.
+ */
+static int ug_udbg_getc(void)
+{
+ int ch;
+
+ while ((ch = ug_getc()) == -1)
+ barrier();
+ return ch;
+}
+
+/*
+ * Receives a character. If a character is not available, returns -1.
+ */
+static int ug_udbg_getc_poll(void)
+{
+ if (!ug_is_rxfifo_ready())
+ return -1;
+ return ug_getc();
+}
+
+/*
+ * Checks if a USB Gecko adapter is inserted in any memory card slot.
+ */
+static void __iomem *__init ug_udbg_probe(void __iomem *exi_io_base)
+{
+ int i;
+
+ /* look for a usbgecko on memcard slots A and B */
+ for (i = 0; i < 2; i++) {
+ ug_io_base = exi_io_base + 0x14 * i;
+ if (ug_is_adapter_present())
+ break;
+ }
+ if (i == 2)
+ ug_io_base = NULL;
+ return ug_io_base;
+
+}
+
+/*
+ * USB Gecko udbg support initialization.
+ */
+void __init ug_udbg_init(void)
+{
+ struct device_node *np;
+ void __iomem *exi_io_base;
+
+ if (ug_io_base)
+ udbg_printf("%s: early -> final\n", __func__);
+
+ np = of_find_compatible_node(NULL, NULL, "nintendo,flipper-exi");
+ if (!np) {
+ udbg_printf("%s: EXI node not found\n", __func__);
+ goto out;
+ }
+
+ exi_io_base = of_iomap(np, 0);
+ if (!exi_io_base) {
+ udbg_printf("%s: failed to setup EXI io base\n", __func__);
+ goto done;
+ }
+
+ if (!ug_udbg_probe(exi_io_base)) {
+ udbg_printf("usbgecko_udbg: not found\n");
+ iounmap(exi_io_base);
+ } else {
+ udbg_putc = ug_udbg_putc;
+ udbg_getc = ug_udbg_getc;
+ udbg_getc_poll = ug_udbg_getc_poll;
+ udbg_printf("usbgecko_udbg: ready\n");
+ }
+
+done:
+ of_node_put(np);
+out:
+ return;
+}
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
+
+static phys_addr_t __init ug_early_grab_io_addr(void)
+{
+#if defined(CONFIG_GAMECUBE)
+ return 0x0c000000;
+#elif defined(CONFIG_WII)
+ return 0x0d000000;
+#else
+#error Invalid platform for USB Gecko based early debugging.
+#endif
+}
+
+/*
+ * USB Gecko early debug support initialization for udbg.
+ */
+void __init udbg_init_usbgecko(void)
+{
+ void __iomem *early_debug_area;
+ void __iomem *exi_io_base;
+
+ /*
+ * At this point we have a BAT already setup that enables I/O
+ * to the EXI hardware.
+ *
+ * The BAT uses a virtual address range reserved at the fixmap.
+ * This must match the virtual address configured in
+ * head_32.S:setup_usbgecko_bat().
+ */
+ early_debug_area = (void __iomem *)__fix_to_virt(FIX_EARLY_DEBUG_BASE);
+ exi_io_base = early_debug_area + 0x00006800;
+
+ /* try to detect a USB Gecko */
+ if (!ug_udbg_probe(exi_io_base))
+ return;
+
+ /* we found a USB Gecko, load udbg hooks */
+ udbg_putc = ug_udbg_putc;
+ udbg_getc = ug_udbg_getc;
+ udbg_getc_poll = ug_udbg_getc_poll;
+
+ /*
+ * Prepare again the same BAT for MMU_init.
+ * This allows udbg I/O to continue working after the MMU is
+ * turned on for real.
+ * It is safe to continue using the same virtual address as it is
+ * a reserved fixmap area.
+ */
+ setbat(1, (unsigned long)early_debug_area,
+ ug_early_grab_io_addr(), 128*1024, PAGE_KERNEL_NCG);
+}
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_USBGECKO */
+
diff --git a/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h
new file mode 100644
index 000000000..bceb11911
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h
+ *
+ * udbg serial input/output routines for the USB Gecko adapter.
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+
+#ifndef __USBGECKO_UDBG_H
+#define __USBGECKO_UDBG_H
+
+#ifdef CONFIG_USBGECKO_UDBG
+
+extern void __init ug_udbg_init(void);
+
+#else
+
+static inline void __init ug_udbg_init(void)
+{
+}
+
+#endif /* CONFIG_USBGECKO_UDBG */
+
+void __init udbg_init_usbgecko(void);
+
+#endif /* __USBGECKO_UDBG_H */
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
new file mode 100644
index 000000000..cb3be6d6e
--- /dev/null
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/embedded6xx/wii.c
+ *
+ * Nintendo Wii board-specific support
+ * Copyright (C) 2008-2009 The GameCube Linux Team
+ * Copyright (C) 2008,2009 Albert Herranz
+ */
+#define DRV_MODULE_NAME "wii"
+#define pr_fmt(fmt) DRV_MODULE_NAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+#include "flipper-pic.h"
+#include "hlwd-pic.h"
+#include "usbgecko_udbg.h"
+
+/* control block */
+#define HW_CTRL_COMPATIBLE "nintendo,hollywood-control"
+
+#define HW_CTRL_RESETS 0x94
+#define HW_CTRL_RESETS_SYS (1<<0)
+
+/* gpio */
+#define HW_GPIO_COMPATIBLE "nintendo,hollywood-gpio"
+
+#define HW_GPIO_BASE(idx) (idx * 0x20)
+#define HW_GPIO_OUT(idx) (HW_GPIO_BASE(idx) + 0)
+#define HW_GPIO_DIR(idx) (HW_GPIO_BASE(idx) + 4)
+#define HW_GPIO_OWNER (HW_GPIO_BASE(1) + 0x1c)
+
+#define HW_GPIO_SHUTDOWN (1<<1)
+#define HW_GPIO_SLOT_LED (1<<5)
+#define HW_GPIO_SENSOR_BAR (1<<8)
+
+
+static void __iomem *hw_ctrl;
+static void __iomem *hw_gpio;
+
+static void __noreturn wii_spin(void)
+{
+ local_irq_disable();
+ for (;;)
+ cpu_relax();
+}
+
+static void __iomem *__init wii_ioremap_hw_regs(char *name, char *compatible)
+{
+ void __iomem *hw_regs = NULL;
+ struct device_node *np;
+ struct resource res;
+ int error = -ENODEV;
+
+ np = of_find_compatible_node(NULL, NULL, compatible);
+ if (!np) {
+ pr_err("no compatible node found for %s\n", compatible);
+ goto out;
+ }
+ error = of_address_to_resource(np, 0, &res);
+ if (error) {
+ pr_err("no valid reg found for %pOFn\n", np);
+ goto out_put;
+ }
+
+ hw_regs = ioremap(res.start, resource_size(&res));
+ if (hw_regs) {
+ pr_info("%s at 0x%pa mapped to 0x%p\n", name,
+ &res.start, hw_regs);
+ }
+
+out_put:
+ of_node_put(np);
+out:
+ return hw_regs;
+}
+
+static void __init wii_setup_arch(void)
+{
+ hw_ctrl = wii_ioremap_hw_regs("hw_ctrl", HW_CTRL_COMPATIBLE);
+ hw_gpio = wii_ioremap_hw_regs("hw_gpio", HW_GPIO_COMPATIBLE);
+ if (hw_gpio) {
+ /* turn off the front blue led and IR light */
+ clrbits32(hw_gpio + HW_GPIO_OUT(0),
+ HW_GPIO_SLOT_LED | HW_GPIO_SENSOR_BAR);
+ }
+}
+
+static void __noreturn wii_restart(char *cmd)
+{
+ local_irq_disable();
+
+ if (hw_ctrl) {
+ /* clear the system reset pin to cause a reset */
+ clrbits32(hw_ctrl + HW_CTRL_RESETS, HW_CTRL_RESETS_SYS);
+ }
+ wii_spin();
+}
+
+static void wii_power_off(void)
+{
+ local_irq_disable();
+
+ if (hw_gpio) {
+ /*
+ * set the owner of the shutdown pin to ARM, because it is
+ * accessed through the registers for the ARM, below
+ */
+ clrbits32(hw_gpio + HW_GPIO_OWNER, HW_GPIO_SHUTDOWN);
+
+ /* make sure that the poweroff GPIO is configured as output */
+ setbits32(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN);
+
+ /* drive the poweroff GPIO high */
+ setbits32(hw_gpio + HW_GPIO_OUT(1), HW_GPIO_SHUTDOWN);
+ }
+ wii_spin();
+}
+
+static void __noreturn wii_halt(void)
+{
+ if (ppc_md.restart)
+ ppc_md.restart(NULL);
+ wii_spin();
+}
+
+static void __init wii_pic_probe(void)
+{
+ flipper_pic_probe();
+ hlwd_pic_probe();
+}
+
+static int __init wii_probe(void)
+{
+ pm_power_off = wii_power_off;
+
+ ug_udbg_init();
+
+ return 1;
+}
+
+static void wii_shutdown(void)
+{
+ hlwd_quiesce();
+ flipper_quiesce();
+}
+
+static const struct of_device_id wii_of_bus[] = {
+ { .compatible = "nintendo,hollywood", },
+ { },
+};
+
+static int __init wii_device_probe(void)
+{
+ of_platform_populate(NULL, wii_of_bus, NULL, NULL);
+ return 0;
+}
+machine_device_initcall(wii, wii_device_probe);
+
+define_machine(wii) {
+ .name = "wii",
+ .compatible = "nintendo,wii",
+ .probe = wii_probe,
+ .setup_arch = wii_setup_arch,
+ .restart = wii_restart,
+ .halt = wii_halt,
+ .init_IRQ = wii_pic_probe,
+ .get_irq = flipper_pic_get_irq,
+ .progress = udbg_progress,
+ .machine_shutdown = wii_shutdown,
+};
diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c
new file mode 100644
index 000000000..b8d37a993
--- /dev/null
+++ b/arch/powerpc/platforms/fsl_uli1575.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * ULI M1575 setup code - specific to Freescale boards
+ *
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/of_irq.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+#include <sysdev/fsl_pci.h>
+
+#define ULI_PIRQA 0x08
+#define ULI_PIRQB 0x09
+#define ULI_PIRQC 0x0a
+#define ULI_PIRQD 0x0b
+#define ULI_PIRQE 0x0c
+#define ULI_PIRQF 0x0d
+#define ULI_PIRQG 0x0e
+
+#define ULI_8259_NONE 0x00
+#define ULI_8259_IRQ1 0x08
+#define ULI_8259_IRQ3 0x02
+#define ULI_8259_IRQ4 0x04
+#define ULI_8259_IRQ5 0x05
+#define ULI_8259_IRQ6 0x07
+#define ULI_8259_IRQ7 0x06
+#define ULI_8259_IRQ9 0x01
+#define ULI_8259_IRQ10 0x03
+#define ULI_8259_IRQ11 0x09
+#define ULI_8259_IRQ12 0x0b
+#define ULI_8259_IRQ14 0x0d
+#define ULI_8259_IRQ15 0x0f
+
+static u8 uli_pirq_to_irq[8] = {
+ ULI_8259_IRQ9, /* PIRQA */
+ ULI_8259_IRQ10, /* PIRQB */
+ ULI_8259_IRQ11, /* PIRQC */
+ ULI_8259_IRQ12, /* PIRQD */
+ ULI_8259_IRQ5, /* PIRQE */
+ ULI_8259_IRQ6, /* PIRQF */
+ ULI_8259_IRQ7, /* PIRQG */
+ ULI_8259_NONE, /* PIRQH */
+};
+
+static inline bool is_quirk_valid(void)
+{
+ return (machine_is(mpc86xx_hpcn) ||
+ machine_is(mpc8544_ds) ||
+ machine_is(p2020_ds) ||
+ machine_is(mpc8572_ds));
+}
+
+/* Bridge */
+static void early_uli5249(struct pci_dev *dev)
+{
+ unsigned char temp;
+
+ if (!is_quirk_valid())
+ return;
+
+ pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_IO |
+ PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+ /* read/write lock */
+ pci_read_config_byte(dev, 0x7c, &temp);
+ pci_write_config_byte(dev, 0x7c, 0x80);
+
+ /* set as P2P bridge */
+ pci_write_config_byte(dev, PCI_CLASS_PROG, 0x01);
+ dev->class |= 0x1;
+
+ /* restore lock */
+ pci_write_config_byte(dev, 0x7c, temp);
+}
+
+
+static void quirk_uli1575(struct pci_dev *dev)
+{
+ int i;
+
+ if (!is_quirk_valid())
+ return;
+
+ /*
+ * ULI1575 interrupts route setup
+ */
+
+ /* ULI1575 IRQ mapping conf register maps PIRQx to IRQn */
+ for (i = 0; i < 4; i++) {
+ u8 val = uli_pirq_to_irq[i*2] | (uli_pirq_to_irq[i*2+1] << 4);
+ pci_write_config_byte(dev, 0x48 + i, val);
+ }
+
+ /* USB 1.1 OHCI controller 1: dev 28, func 0 - IRQ12 */
+ pci_write_config_byte(dev, 0x86, ULI_PIRQD);
+
+ /* USB 1.1 OHCI controller 2: dev 28, func 1 - IRQ9 */
+ pci_write_config_byte(dev, 0x87, ULI_PIRQA);
+
+ /* USB 1.1 OHCI controller 3: dev 28, func 2 - IRQ10 */
+ pci_write_config_byte(dev, 0x88, ULI_PIRQB);
+
+ /* Lan controller: dev 27, func 0 - IRQ6 */
+ pci_write_config_byte(dev, 0x89, ULI_PIRQF);
+
+ /* AC97 Audio controller: dev 29, func 0 - IRQ6 */
+ pci_write_config_byte(dev, 0x8a, ULI_PIRQF);
+
+ /* Modem controller: dev 29, func 1 - IRQ6 */
+ pci_write_config_byte(dev, 0x8b, ULI_PIRQF);
+
+ /* HD Audio controller: dev 29, func 2 - IRQ6 */
+ pci_write_config_byte(dev, 0x8c, ULI_PIRQF);
+
+ /* SATA controller: dev 31, func 1 - IRQ5 */
+ pci_write_config_byte(dev, 0x8d, ULI_PIRQE);
+
+ /* SMB interrupt: dev 30, func 1 - IRQ7 */
+ pci_write_config_byte(dev, 0x8e, ULI_PIRQG);
+
+ /* PMU ACPI SCI interrupt: dev 30, func 2 - IRQ7 */
+ pci_write_config_byte(dev, 0x8f, ULI_PIRQG);
+
+ /* USB 2.0 controller: dev 28, func 3 */
+ pci_write_config_byte(dev, 0x74, ULI_8259_IRQ11);
+
+ /* Primary PATA IDE IRQ: 14
+ * Secondary PATA IDE IRQ: 15
+ */
+ pci_write_config_byte(dev, 0x44, 0x30 | ULI_8259_IRQ14);
+ pci_write_config_byte(dev, 0x75, ULI_8259_IRQ15);
+}
+
+static void quirk_final_uli1575(struct pci_dev *dev)
+{
+ /* Set i8259 interrupt trigger
+ * IRQ 3: Level
+ * IRQ 4: Level
+ * IRQ 5: Level
+ * IRQ 6: Level
+ * IRQ 7: Level
+ * IRQ 9: Level
+ * IRQ 10: Level
+ * IRQ 11: Level
+ * IRQ 12: Level
+ * IRQ 14: Edge
+ * IRQ 15: Edge
+ */
+ if (!is_quirk_valid())
+ return;
+
+ outb(0xfa, 0x4d0);
+ outb(0x1e, 0x4d1);
+
+ /* setup RTC */
+ CMOS_WRITE(RTC_SET, RTC_CONTROL);
+ CMOS_WRITE(RTC_24H, RTC_CONTROL);
+
+ /* ensure month, date, and week alarm fields are ignored */
+ CMOS_WRITE(0, RTC_VALID);
+
+ outb_p(0x7c, 0x72);
+ outb_p(RTC_ALARM_DONT_CARE, 0x73);
+
+ outb_p(0x7d, 0x72);
+ outb_p(RTC_ALARM_DONT_CARE, 0x73);
+}
+
+/* SATA */
+static void quirk_uli5288(struct pci_dev *dev)
+{
+ unsigned char c;
+ unsigned int d;
+
+ if (!is_quirk_valid())
+ return;
+
+ /* read/write lock */
+ pci_read_config_byte(dev, 0x83, &c);
+ pci_write_config_byte(dev, 0x83, c|0x80);
+
+ pci_read_config_dword(dev, PCI_CLASS_REVISION, &d);
+ d = (d & 0xff) | (PCI_CLASS_STORAGE_SATA_AHCI << 8);
+ pci_write_config_dword(dev, PCI_CLASS_REVISION, d);
+
+ /* restore lock */
+ pci_write_config_byte(dev, 0x83, c);
+
+ /* disable emulated PATA mode enabled */
+ pci_read_config_byte(dev, 0x84, &c);
+ pci_write_config_byte(dev, 0x84, c & ~0x01);
+}
+
+/* PATA */
+static void quirk_uli5229(struct pci_dev *dev)
+{
+ unsigned short temp;
+
+ if (!is_quirk_valid())
+ return;
+
+ pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE |
+ PCI_COMMAND_MASTER | PCI_COMMAND_IO);
+
+ /* Enable Native IRQ 14/15 */
+ pci_read_config_word(dev, 0x4a, &temp);
+ pci_write_config_word(dev, 0x4a, temp | 0x1000);
+}
+
+/* We have to do a dummy read on the P2P for the RTC to work, WTF */
+static void quirk_final_uli5249(struct pci_dev *dev)
+{
+ int i;
+ u8 *dummy;
+ struct pci_bus *bus = dev->bus;
+ struct resource *res;
+ resource_size_t end = 0;
+
+ for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCES+3; i++) {
+ unsigned long flags = pci_resource_flags(dev, i);
+ if ((flags & (IORESOURCE_MEM|IORESOURCE_PREFETCH)) == IORESOURCE_MEM)
+ end = pci_resource_end(dev, i);
+ }
+
+ pci_bus_for_each_resource(bus, res, i) {
+ if (res && res->flags & IORESOURCE_MEM) {
+ if (res->end == end)
+ dummy = ioremap(res->start, 0x4);
+ else
+ dummy = ioremap(res->end - 3, 0x4);
+ if (dummy) {
+ in_8(dummy);
+ iounmap(dummy);
+ }
+ break;
+ }
+ }
+}
+
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AL, 0x5249, early_uli5249);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, quirk_uli1575);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, quirk_uli5288);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5249, quirk_final_uli5249);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x1575, quirk_final_uli1575);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229);
+
+static void hpcd_quirk_uli1575(struct pci_dev *dev)
+{
+ u32 temp32;
+
+ if (!machine_is(mpc86xx_hpcd))
+ return;
+
+ /* Disable INTx */
+ pci_read_config_dword(dev, 0x48, &temp32);
+ pci_write_config_dword(dev, 0x48, (temp32 | 1<<26));
+
+ /* Enable sideband interrupt */
+ pci_read_config_dword(dev, 0x90, &temp32);
+ pci_write_config_dword(dev, 0x90, (temp32 | 1<<22));
+}
+
+static void hpcd_quirk_uli5288(struct pci_dev *dev)
+{
+ unsigned char c;
+
+ if (!machine_is(mpc86xx_hpcd))
+ return;
+
+ pci_read_config_byte(dev, 0x83, &c);
+ c |= 0x80;
+ pci_write_config_byte(dev, 0x83, c);
+
+ pci_write_config_byte(dev, PCI_CLASS_PROG, 0x01);
+ pci_write_config_byte(dev, PCI_CLASS_DEVICE, 0x06);
+
+ pci_read_config_byte(dev, 0x83, &c);
+ c &= 0x7f;
+ pci_write_config_byte(dev, 0x83, c);
+}
+
+/*
+ * Since 8259PIC was disabled on the board, the IDE device can not
+ * use the legacy IRQ, we need to let the IDE device work under
+ * native mode and use the interrupt line like other PCI devices.
+ * IRQ14 is a sideband interrupt from IDE device to CPU and we use this
+ * as the interrupt for IDE device.
+ */
+static void hpcd_quirk_uli5229(struct pci_dev *dev)
+{
+ unsigned char c;
+
+ if (!machine_is(mpc86xx_hpcd))
+ return;
+
+ pci_read_config_byte(dev, 0x4b, &c);
+ c |= 0x10;
+ pci_write_config_byte(dev, 0x4b, c);
+}
+
+/*
+ * SATA interrupt pin bug fix
+ * There's a chip bug for 5288, The interrupt pin should be 2,
+ * not the read only value 1, So it use INTB#, not INTA# which
+ * actually used by the IDE device 5229.
+ * As of this bug, during the PCI initialization, 5288 read the
+ * irq of IDE device from the device tree, this function fix this
+ * bug by re-assigning a correct irq to 5288.
+ *
+ */
+static void hpcd_final_uli5288(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct device_node *hosenode = hose ? hose->dn : NULL;
+ struct of_phandle_args oirq;
+ u32 laddr[3];
+
+ if (!machine_is(mpc86xx_hpcd))
+ return;
+
+ if (!hosenode)
+ return;
+
+ oirq.np = hosenode;
+ oirq.args[0] = 2;
+ oirq.args_count = 1;
+ laddr[0] = (hose->first_busno << 16) | (PCI_DEVFN(31, 0) << 8);
+ laddr[1] = laddr[2] = 0;
+ of_irq_parse_raw(laddr, &oirq);
+ dev->irq = irq_create_of_mapping(&oirq);
+}
+
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x1575, hpcd_quirk_uli1575);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, hpcd_quirk_uli5288);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, hpcd_quirk_uli5229);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5288, hpcd_final_uli5288);
+
+static int uli_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn)
+{
+ if (hose->dn == fsl_pci_primary && bus == (hose->first_busno + 2)) {
+ /* exclude Modem controller */
+ if ((PCI_SLOT(devfn) == 29) && (PCI_FUNC(devfn) == 1))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /* exclude HD Audio controller */
+ if ((PCI_SLOT(devfn) == 29) && (PCI_FUNC(devfn) == 2))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+void __init uli_init(void)
+{
+ struct device_node *node;
+ struct device_node *pci_with_uli;
+
+ /* See if we have a ULI under the primary */
+
+ node = of_find_node_by_name(NULL, "uli1575");
+ while ((pci_with_uli = of_get_parent(node))) {
+ of_node_put(node);
+ node = pci_with_uli;
+
+ if (pci_with_uli == fsl_pci_primary) {
+ ppc_md.pci_exclude_device = uli_exclude_device;
+ break;
+ }
+ }
+}
diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig
new file mode 100644
index 000000000..4c058cc57
--- /dev/null
+++ b/arch/powerpc/platforms/maple/Kconfig
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MAPLE
+ depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
+ bool "Maple 970FX Evaluation Board"
+ select FORCE_PCI
+ select MPIC
+ select U3_DART
+ select MPIC_U3_HT_IRQS
+ select GENERIC_TBSYNC
+ select PPC_UDBG_16550
+ select PPC_970_NAP
+ select PPC_64S_HASH_MMU
+ select PPC_HASH_MMU_NATIVE
+ select PPC_RTAS
+ select MMIO_NVRAM
+ select ATA_NONSTANDARD if ATA
+ help
+ This option enables support for the Maple 970FX Evaluation Board.
+ For more information, refer to <http://www.970eval.com>
diff --git a/arch/powerpc/platforms/maple/Makefile b/arch/powerpc/platforms/maple/Makefile
new file mode 100644
index 000000000..19f35ab82
--- /dev/null
+++ b/arch/powerpc/platforms/maple/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y += setup.o pci.o time.o
diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h
new file mode 100644
index 000000000..4f358b55c
--- /dev/null
+++ b/arch/powerpc/platforms/maple/maple.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Declarations for maple-specific code.
+ *
+ * Maple is the name of a PPC970 evaluation board.
+ */
+extern int maple_set_rtc_time(struct rtc_time *tm);
+extern void maple_get_rtc_time(struct rtc_time *tm);
+extern time64_t maple_get_boot_time(void);
+extern void maple_calibrate_decr(void);
+extern void maple_pci_init(void);
+extern void maple_pci_irq_fixup(struct pci_dev *dev);
+extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel);
+
+extern struct pci_controller_ops maple_pci_controller_ops;
diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c
new file mode 100644
index 000000000..b911b3171
--- /dev/null
+++ b/arch/powerpc/platforms/maple/pci.c
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/of_irq.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/iommu.h>
+#include <asm/ppc-pci.h>
+#include <asm/isa-bridge.h>
+
+#include "maple.h"
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+static struct pci_controller *u3_agp, *u3_ht, *u4_pcie;
+
+static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
+{
+ for (; node; node = node->sibling) {
+ const int *bus_range;
+ const unsigned int *class_code;
+ int len;
+
+ /* For PCI<->PCI bridges or CardBus bridges, we go down */
+ class_code = of_get_property(node, "class-code", NULL);
+ if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
+ (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
+ continue;
+ bus_range = of_get_property(node, "bus-range", &len);
+ if (bus_range != NULL && len > 2 * sizeof(int)) {
+ if (bus_range[1] > higher)
+ higher = bus_range[1];
+ }
+ higher = fixup_one_level_bus_range(node->child, higher);
+ }
+ return higher;
+}
+
+/* This routine fixes the "bus-range" property of all bridges in the
+ * system since they tend to have their "last" member wrong on macs
+ *
+ * Note that the bus numbers manipulated here are OF bus numbers, they
+ * are not Linux bus numbers.
+ */
+static void __init fixup_bus_range(struct device_node *bridge)
+{
+ int *bus_range;
+ struct property *prop;
+ int len;
+
+ /* Lookup the "bus-range" property for the hose */
+ prop = of_find_property(bridge, "bus-range", &len);
+ if (prop == NULL || prop->value == NULL || len < 2 * sizeof(int)) {
+ printk(KERN_WARNING "Can't get bus-range for %pOF\n",
+ bridge);
+ return;
+ }
+ bus_range = prop->value;
+ bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
+}
+
+
+static unsigned long u3_agp_cfa0(u8 devfn, u8 off)
+{
+ return (1 << (unsigned long)PCI_SLOT(devfn)) |
+ ((unsigned long)PCI_FUNC(devfn) << 8) |
+ ((unsigned long)off & 0xFCUL);
+}
+
+static unsigned long u3_agp_cfa1(u8 bus, u8 devfn, u8 off)
+{
+ return ((unsigned long)bus << 16) |
+ ((unsigned long)devfn << 8) |
+ ((unsigned long)off & 0xFCUL) |
+ 1UL;
+}
+
+static volatile void __iomem *u3_agp_cfg_access(struct pci_controller* hose,
+ u8 bus, u8 dev_fn, u8 offset)
+{
+ unsigned int caddr;
+
+ if (bus == hose->first_busno) {
+ if (dev_fn < (11 << 3))
+ return NULL;
+ caddr = u3_agp_cfa0(dev_fn, offset);
+ } else
+ caddr = u3_agp_cfa1(bus, dev_fn, offset);
+
+ /* Uninorth will return garbage if we don't read back the value ! */
+ do {
+ out_le32(hose->cfg_addr, caddr);
+ } while (in_le32(hose->cfg_addr) != caddr);
+
+ offset &= 0x07;
+ return hose->cfg_data + offset;
+}
+
+static int u3_agp_read_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 *val)
+{
+ struct pci_controller *hose;
+ volatile void __iomem *addr;
+
+ hose = pci_bus_to_host(bus);
+ if (hose == NULL)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ addr = u3_agp_cfg_access(hose, bus->number, devfn, offset);
+ if (!addr)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ /*
+ * Note: the caller has already checked that offset is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ switch (len) {
+ case 1:
+ *val = in_8(addr);
+ break;
+ case 2:
+ *val = in_le16(addr);
+ break;
+ default:
+ *val = in_le32(addr);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_agp_write_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 val)
+{
+ struct pci_controller *hose;
+ volatile void __iomem *addr;
+
+ hose = pci_bus_to_host(bus);
+ if (hose == NULL)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ addr = u3_agp_cfg_access(hose, bus->number, devfn, offset);
+ if (!addr)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ /*
+ * Note: the caller has already checked that offset is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ switch (len) {
+ case 1:
+ out_8(addr, val);
+ break;
+ case 2:
+ out_le16(addr, val);
+ break;
+ default:
+ out_le32(addr, val);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u3_agp_pci_ops =
+{
+ .read = u3_agp_read_config,
+ .write = u3_agp_write_config,
+};
+
+static unsigned long u3_ht_cfa0(u8 devfn, u8 off)
+{
+ return (devfn << 8) | off;
+}
+
+static unsigned long u3_ht_cfa1(u8 bus, u8 devfn, u8 off)
+{
+ return u3_ht_cfa0(devfn, off) + (bus << 16) + 0x01000000UL;
+}
+
+static volatile void __iomem *u3_ht_cfg_access(struct pci_controller* hose,
+ u8 bus, u8 devfn, u8 offset)
+{
+ if (bus == hose->first_busno) {
+ if (PCI_SLOT(devfn) == 0)
+ return NULL;
+ return hose->cfg_data + u3_ht_cfa0(devfn, offset);
+ } else
+ return hose->cfg_data + u3_ht_cfa1(bus, devfn, offset);
+}
+
+static int u3_ht_root_read_config(struct pci_controller *hose, u8 offset,
+ int len, u32 *val)
+{
+ volatile void __iomem *addr;
+
+ addr = hose->cfg_addr;
+ addr += ((offset & ~3) << 2) + (4 - len - (offset & 3));
+
+ switch (len) {
+ case 1:
+ *val = in_8(addr);
+ break;
+ case 2:
+ *val = in_be16(addr);
+ break;
+ default:
+ *val = in_be32(addr);
+ break;
+ }
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_root_write_config(struct pci_controller *hose, u8 offset,
+ int len, u32 val)
+{
+ volatile void __iomem *addr;
+
+ addr = hose->cfg_addr + ((offset & ~3) << 2) + (4 - len - (offset & 3));
+
+ if (offset >= PCI_BASE_ADDRESS_0 && offset < PCI_CAPABILITY_LIST)
+ return PCIBIOS_SUCCESSFUL;
+
+ switch (len) {
+ case 1:
+ out_8(addr, val);
+ break;
+ case 2:
+ out_be16(addr, val);
+ break;
+ default:
+ out_be32(addr, val);
+ break;
+ }
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 *val)
+{
+ struct pci_controller *hose;
+ volatile void __iomem *addr;
+
+ hose = pci_bus_to_host(bus);
+ if (hose == NULL)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0))
+ return u3_ht_root_read_config(hose, offset, len, val);
+
+ if (offset > 0xff)
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
+ if (!addr)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ /*
+ * Note: the caller has already checked that offset is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ switch (len) {
+ case 1:
+ *val = in_8(addr);
+ break;
+ case 2:
+ *val = in_le16(addr);
+ break;
+ default:
+ *val = in_le32(addr);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 val)
+{
+ struct pci_controller *hose;
+ volatile void __iomem *addr;
+
+ hose = pci_bus_to_host(bus);
+ if (hose == NULL)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0))
+ return u3_ht_root_write_config(hose, offset, len, val);
+
+ if (offset > 0xff)
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = u3_ht_cfg_access(hose, bus->number, devfn, offset);
+ if (!addr)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ /*
+ * Note: the caller has already checked that offset is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ switch (len) {
+ case 1:
+ out_8(addr, val);
+ break;
+ case 2:
+ out_le16(addr, val);
+ break;
+ default:
+ out_le32(addr, val);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u3_ht_pci_ops =
+{
+ .read = u3_ht_read_config,
+ .write = u3_ht_write_config,
+};
+
+static unsigned int u4_pcie_cfa0(unsigned int devfn, unsigned int off)
+{
+ return (1 << PCI_SLOT(devfn)) |
+ (PCI_FUNC(devfn) << 8) |
+ ((off >> 8) << 28) |
+ (off & 0xfcu);
+}
+
+static unsigned int u4_pcie_cfa1(unsigned int bus, unsigned int devfn,
+ unsigned int off)
+{
+ return (bus << 16) |
+ (devfn << 8) |
+ ((off >> 8) << 28) |
+ (off & 0xfcu) | 1u;
+}
+
+static volatile void __iomem *u4_pcie_cfg_access(struct pci_controller* hose,
+ u8 bus, u8 dev_fn, int offset)
+{
+ unsigned int caddr;
+
+ if (bus == hose->first_busno)
+ caddr = u4_pcie_cfa0(dev_fn, offset);
+ else
+ caddr = u4_pcie_cfa1(bus, dev_fn, offset);
+
+ /* Uninorth will return garbage if we don't read back the value ! */
+ do {
+ out_le32(hose->cfg_addr, caddr);
+ } while (in_le32(hose->cfg_addr) != caddr);
+
+ offset &= 0x03;
+ return hose->cfg_data + offset;
+}
+
+static int u4_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 *val)
+{
+ struct pci_controller *hose;
+ volatile void __iomem *addr;
+
+ hose = pci_bus_to_host(bus);
+ if (hose == NULL)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (offset >= 0x1000)
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+ addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset);
+ if (!addr)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ /*
+ * Note: the caller has already checked that offset is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ switch (len) {
+ case 1:
+ *val = in_8(addr);
+ break;
+ case 2:
+ *val = in_le16(addr);
+ break;
+ default:
+ *val = in_le32(addr);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+static int u4_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 val)
+{
+ struct pci_controller *hose;
+ volatile void __iomem *addr;
+
+ hose = pci_bus_to_host(bus);
+ if (hose == NULL)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (offset >= 0x1000)
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+ addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset);
+ if (!addr)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ /*
+ * Note: the caller has already checked that offset is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ switch (len) {
+ case 1:
+ out_8(addr, val);
+ break;
+ case 2:
+ out_le16(addr, val);
+ break;
+ default:
+ out_le32(addr, val);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u4_pcie_pci_ops =
+{
+ .read = u4_pcie_read_config,
+ .write = u4_pcie_write_config,
+};
+
+static void __init setup_u3_agp(struct pci_controller* hose)
+{
+ /* On G5, we move AGP up to high bus number so we don't need
+ * to reassign bus numbers for HT. If we ever have P2P bridges
+ * on AGP, we'll have to move pci_assign_all_buses to the
+ * pci_controller structure so we enable it for AGP and not for
+ * HT childs.
+ * We hard code the address because of the different size of
+ * the reg address cell, we shall fix that by killing struct
+ * reg_property and using some accessor functions instead
+ */
+ hose->first_busno = 0xf0;
+ hose->last_busno = 0xff;
+ hose->ops = &u3_agp_pci_ops;
+ hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+ hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+
+ u3_agp = hose;
+}
+
+static void __init setup_u4_pcie(struct pci_controller* hose)
+{
+ /* We currently only implement the "non-atomic" config space, to
+ * be optimised later.
+ */
+ hose->ops = &u4_pcie_pci_ops;
+ hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+ hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+
+ u4_pcie = hose;
+}
+
+static void __init setup_u3_ht(struct pci_controller* hose)
+{
+ hose->ops = &u3_ht_pci_ops;
+
+ /* We hard code the address because of the different size of
+ * the reg address cell, we shall fix that by killing struct
+ * reg_property and using some accessor functions instead
+ */
+ hose->cfg_data = ioremap(0xf2000000, 0x02000000);
+ hose->cfg_addr = ioremap(0xf8070000, 0x1000);
+
+ hose->first_busno = 0;
+ hose->last_busno = 0xef;
+
+ u3_ht = hose;
+}
+
+static int __init maple_add_bridge(struct device_node *dev)
+{
+ int len;
+ struct pci_controller *hose;
+ char* disp_name;
+ const int *bus_range;
+ int primary = 1;
+
+ DBG("Adding PCI host bridge %pOF\n", dev);
+
+ bus_range = of_get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume bus 0\n",
+ dev);
+ }
+
+ hose = pcibios_alloc_controller(dev);
+ if (hose == NULL)
+ return -ENOMEM;
+ hose->first_busno = bus_range ? bus_range[0] : 0;
+ hose->last_busno = bus_range ? bus_range[1] : 0xff;
+ hose->controller_ops = maple_pci_controller_ops;
+
+ disp_name = NULL;
+ if (of_device_is_compatible(dev, "u3-agp")) {
+ setup_u3_agp(hose);
+ disp_name = "U3-AGP";
+ primary = 0;
+ } else if (of_device_is_compatible(dev, "u3-ht")) {
+ setup_u3_ht(hose);
+ disp_name = "U3-HT";
+ primary = 1;
+ } else if (of_device_is_compatible(dev, "u4-pcie")) {
+ setup_u4_pcie(hose);
+ disp_name = "U4-PCIE";
+ primary = 0;
+ }
+ printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n",
+ disp_name, hose->first_busno, hose->last_busno);
+
+ /* Interpret the "ranges" property */
+ /* This also maps the I/O region and sets isa_io/mem_base */
+ pci_process_bridge_OF_ranges(hose, dev, primary);
+
+ /* Fixup "bus-range" OF property */
+ fixup_bus_range(dev);
+
+ /* Check for legacy IOs */
+ isa_bridge_find_early(hose);
+
+ /* create pci_dn's for DT nodes under this PHB */
+ pci_devs_phb_init_dynamic(hose);
+
+ return 0;
+}
+
+
+void maple_pci_irq_fixup(struct pci_dev *dev)
+{
+ DBG(" -> maple_pci_irq_fixup\n");
+
+ /* Fixup IRQ for PCIe host */
+ if (u4_pcie != NULL && dev->bus->number == 0 &&
+ pci_bus_to_host(dev->bus) == u4_pcie) {
+ printk(KERN_DEBUG "Fixup U4 PCIe IRQ\n");
+ dev->irq = irq_create_mapping(NULL, 1);
+ if (dev->irq)
+ irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW);
+ }
+
+ /* Hide AMD8111 IDE interrupt when in legacy mode so
+ * the driver calls pci_get_legacy_ide_irq()
+ */
+ if (dev->vendor == PCI_VENDOR_ID_AMD &&
+ dev->device == PCI_DEVICE_ID_AMD_8111_IDE &&
+ (dev->class & 5) != 5) {
+ dev->irq = 0;
+ }
+
+ DBG(" <- maple_pci_irq_fixup\n");
+}
+
+static int maple_pci_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+ struct pci_controller *hose = pci_bus_to_host(bridge->bus);
+ struct device_node *np, *child;
+
+ if (hose != u3_agp)
+ return 0;
+
+ /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We
+ * assume there is no P2P bridge on the AGP bus, which should be a
+ * safe assumptions hopefully.
+ */
+ np = hose->dn;
+ PCI_DN(np)->busno = 0xf0;
+ for_each_child_of_node(np, child)
+ PCI_DN(child)->busno = 0xf0;
+
+ return 0;
+}
+
+void __init maple_pci_init(void)
+{
+ struct device_node *np, *root;
+ struct device_node *ht = NULL;
+
+ /* Probe root PCI hosts, that is on U3 the AGP host and the
+ * HyperTransport host. That one is actually "kept" around
+ * and actually added last as it's resource management relies
+ * on the AGP resources to have been setup first
+ */
+ root = of_find_node_by_path("/");
+ if (root == NULL) {
+ printk(KERN_CRIT "maple_find_bridges: can't find root of device tree\n");
+ return;
+ }
+ for_each_child_of_node(root, np) {
+ if (!of_node_is_type(np, "pci") && !of_node_is_type(np, "ht"))
+ continue;
+ if ((of_device_is_compatible(np, "u4-pcie") ||
+ of_device_is_compatible(np, "u3-agp")) &&
+ maple_add_bridge(np) == 0)
+ of_node_get(np);
+
+ if (of_device_is_compatible(np, "u3-ht")) {
+ of_node_get(np);
+ ht = np;
+ }
+ }
+ of_node_put(root);
+
+ /* Now setup the HyperTransport host if we found any
+ */
+ if (ht && maple_add_bridge(ht) != 0)
+ of_node_put(ht);
+
+ ppc_md.pcibios_root_bridge_prepare = maple_pci_root_bridge_prepare;
+
+ /* Tell pci.c to not change any resource allocations. */
+ pci_add_flags(PCI_PROBE_ONLY);
+}
+
+int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel)
+{
+ struct device_node *np;
+ unsigned int defirq = channel ? 15 : 14;
+ unsigned int irq;
+
+ if (pdev->vendor != PCI_VENDOR_ID_AMD ||
+ pdev->device != PCI_DEVICE_ID_AMD_8111_IDE)
+ return defirq;
+
+ np = pci_device_to_OF_node(pdev);
+ if (np == NULL) {
+ printk("Failed to locate OF node for IDE %s\n",
+ pci_name(pdev));
+ return defirq;
+ }
+ irq = irq_of_parse_and_map(np, channel & 0x1);
+ if (!irq) {
+ printk("Failed to map onboard IDE interrupt for channel %d\n",
+ channel);
+ return defirq;
+ }
+ return irq;
+}
+
+static void quirk_ipr_msi(struct pci_dev *dev)
+{
+ /* Something prevents MSIs from the IPR from working on Bimini,
+ * and the driver has no smarts to recover. So disable MSI
+ * on it for now. */
+
+ if (machine_is(maple)) {
+ dev->no_msi = 1;
+ dev_info(&dev->dev, "Quirk disabled MSI\n");
+ }
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN,
+ quirk_ipr_msi);
+
+struct pci_controller_ops maple_pci_controller_ops = {
+};
diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
new file mode 100644
index 000000000..f329a03ed
--- /dev/null
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Maple (970 eval board) setup code
+ *
+ * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/major.h>
+#include <linux/initrd.h>
+#include <linux/vt_kern.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/serial.h>
+#include <linux/smp.h>
+#include <linux/bitops.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/memblock.h>
+
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/dma.h>
+#include <asm/cputable.h>
+#include <asm/time.h>
+#include <asm/mpic.h>
+#include <asm/rtas.h>
+#include <asm/udbg.h>
+#include <asm/nvram.h>
+
+#include "maple.h"
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static unsigned long maple_find_nvram_base(void)
+{
+ struct device_node *rtcs;
+ unsigned long result = 0;
+
+ /* find NVRAM device */
+ rtcs = of_find_compatible_node(NULL, "nvram", "AMD8111");
+ if (rtcs) {
+ struct resource r;
+ if (of_address_to_resource(rtcs, 0, &r)) {
+ printk(KERN_EMERG "Maple: Unable to translate NVRAM"
+ " address\n");
+ goto bail;
+ }
+ if (!(r.flags & IORESOURCE_IO)) {
+ printk(KERN_EMERG "Maple: NVRAM address isn't PIO!\n");
+ goto bail;
+ }
+ result = r.start;
+ } else
+ printk(KERN_EMERG "Maple: Unable to find NVRAM\n");
+ bail:
+ of_node_put(rtcs);
+ return result;
+}
+
+static void __noreturn maple_restart(char *cmd)
+{
+ unsigned int maple_nvram_base;
+ const unsigned int *maple_nvram_offset, *maple_nvram_command;
+ struct device_node *sp;
+
+ maple_nvram_base = maple_find_nvram_base();
+ if (maple_nvram_base == 0)
+ goto fail;
+
+ /* find service processor device */
+ sp = of_find_node_by_name(NULL, "service-processor");
+ if (!sp) {
+ printk(KERN_EMERG "Maple: Unable to find Service Processor\n");
+ goto fail;
+ }
+ maple_nvram_offset = of_get_property(sp, "restart-addr", NULL);
+ maple_nvram_command = of_get_property(sp, "restart-value", NULL);
+ of_node_put(sp);
+
+ /* send command */
+ outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset);
+ for (;;) ;
+ fail:
+ printk(KERN_EMERG "Maple: Manual Restart Required\n");
+ for (;;) ;
+}
+
+static void __noreturn maple_power_off(void)
+{
+ unsigned int maple_nvram_base;
+ const unsigned int *maple_nvram_offset, *maple_nvram_command;
+ struct device_node *sp;
+
+ maple_nvram_base = maple_find_nvram_base();
+ if (maple_nvram_base == 0)
+ goto fail;
+
+ /* find service processor device */
+ sp = of_find_node_by_name(NULL, "service-processor");
+ if (!sp) {
+ printk(KERN_EMERG "Maple: Unable to find Service Processor\n");
+ goto fail;
+ }
+ maple_nvram_offset = of_get_property(sp, "power-off-addr", NULL);
+ maple_nvram_command = of_get_property(sp, "power-off-value", NULL);
+ of_node_put(sp);
+
+ /* send command */
+ outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset);
+ for (;;) ;
+ fail:
+ printk(KERN_EMERG "Maple: Manual Power-Down Required\n");
+ for (;;) ;
+}
+
+static void __noreturn maple_halt(void)
+{
+ maple_power_off();
+}
+
+#ifdef CONFIG_SMP
+static struct smp_ops_t maple_smp_ops = {
+ .probe = smp_mpic_probe,
+ .message_pass = smp_mpic_message_pass,
+ .kick_cpu = smp_generic_kick_cpu,
+ .setup_cpu = smp_mpic_setup_cpu,
+ .give_timebase = smp_generic_give_timebase,
+ .take_timebase = smp_generic_take_timebase,
+};
+#endif /* CONFIG_SMP */
+
+static void __init maple_use_rtas_reboot_and_halt_if_present(void)
+{
+ if (rtas_function_implemented(RTAS_FN_SYSTEM_REBOOT) &&
+ rtas_function_implemented(RTAS_FN_POWER_OFF)) {
+ ppc_md.restart = rtas_restart;
+ pm_power_off = rtas_power_off;
+ ppc_md.halt = rtas_halt;
+ }
+}
+
+static void __init maple_setup_arch(void)
+{
+ /* init to some ~sane value until calibrate_delay() runs */
+ loops_per_jiffy = 50000000;
+
+ /* Setup SMP callback */
+#ifdef CONFIG_SMP
+ smp_ops = &maple_smp_ops;
+#endif
+ maple_use_rtas_reboot_and_halt_if_present();
+
+ printk(KERN_DEBUG "Using native/NAP idle loop\n");
+
+ mmio_nvram_init();
+}
+
+/*
+ * This is almost identical to pSeries and CHRP. We need to make that
+ * code generic at one point, with appropriate bits in the device-tree to
+ * identify the presence of an HT APIC
+ */
+static void __init maple_init_IRQ(void)
+{
+ struct device_node *root, *np, *mpic_node = NULL;
+ const unsigned int *opprop;
+ unsigned long openpic_addr = 0;
+ int naddr, n, i, opplen, has_isus = 0;
+ struct mpic *mpic;
+ unsigned int flags = 0;
+
+ /* Locate MPIC in the device-tree. Note that there is a bug
+ * in Maple device-tree where the type of the controller is
+ * open-pic and not interrupt-controller
+ */
+
+ for_each_node_by_type(np, "interrupt-controller")
+ if (of_device_is_compatible(np, "open-pic")) {
+ mpic_node = np;
+ break;
+ }
+ if (mpic_node == NULL)
+ for_each_node_by_type(np, "open-pic") {
+ mpic_node = np;
+ break;
+ }
+ if (mpic_node == NULL) {
+ printk(KERN_ERR
+ "Failed to locate the MPIC interrupt controller\n");
+ return;
+ }
+
+ /* Find address list in /platform-open-pic */
+ root = of_find_node_by_path("/");
+ naddr = of_n_addr_cells(root);
+ opprop = of_get_property(root, "platform-open-pic", &opplen);
+ if (opprop) {
+ openpic_addr = of_read_number(opprop, naddr);
+ has_isus = (opplen > naddr);
+ printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n",
+ openpic_addr, has_isus);
+ }
+
+ BUG_ON(openpic_addr == 0);
+
+ /* Check for a big endian MPIC */
+ if (of_property_read_bool(np, "big-endian"))
+ flags |= MPIC_BIG_ENDIAN;
+
+ /* XXX Maple specific bits */
+ flags |= MPIC_U3_HT_IRQS;
+ /* All U3/U4 are big-endian, older SLOF firmware doesn't encode this */
+ flags |= MPIC_BIG_ENDIAN;
+
+ /* Setup the openpic driver. More device-tree junks, we hard code no
+ * ISUs for now. I'll have to revisit some stuffs with the folks doing
+ * the firmware for those
+ */
+ mpic = mpic_alloc(mpic_node, openpic_addr, flags,
+ /*has_isus ? 16 :*/ 0, 0, " MPIC ");
+ BUG_ON(mpic == NULL);
+
+ /* Add ISUs */
+ opplen /= sizeof(u32);
+ for (n = 0, i = naddr; i < opplen; i += naddr, n++) {
+ unsigned long isuaddr = of_read_number(opprop + i, naddr);
+ mpic_assign_isu(mpic, n, isuaddr);
+ }
+
+ /* All ISUs are setup, complete initialization */
+ mpic_init(mpic);
+ ppc_md.get_irq = mpic_get_irq;
+ of_node_put(mpic_node);
+ of_node_put(root);
+}
+
+static void __init maple_progress(char *s, unsigned short hex)
+{
+ printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init maple_probe(void)
+{
+ if (!of_machine_is_compatible("Momentum,Maple") &&
+ !of_machine_is_compatible("Momentum,Apache"))
+ return 0;
+
+ pm_power_off = maple_power_off;
+
+ iommu_init_early_dart(&maple_pci_controller_ops);
+
+ return 1;
+}
+
+#ifdef CONFIG_EDAC
+/*
+ * Register a platform device for CPC925 memory controller on
+ * all boards with U3H (CPC925) bridge.
+ */
+static int __init maple_cpc925_edac_setup(void)
+{
+ struct platform_device *pdev;
+ struct device_node *np = NULL;
+ struct resource r;
+ int ret;
+ volatile void __iomem *mem;
+ u32 rev;
+
+ np = of_find_node_by_type(NULL, "memory-controller");
+ if (!np) {
+ printk(KERN_ERR "%s: Unable to find memory-controller node\n",
+ __func__);
+ return -ENODEV;
+ }
+
+ ret = of_address_to_resource(np, 0, &r);
+ of_node_put(np);
+
+ if (ret < 0) {
+ printk(KERN_ERR "%s: Unable to get memory-controller reg\n",
+ __func__);
+ return -ENODEV;
+ }
+
+ mem = ioremap(r.start, resource_size(&r));
+ if (!mem) {
+ printk(KERN_ERR "%s: Unable to map memory-controller memory\n",
+ __func__);
+ return -ENOMEM;
+ }
+
+ rev = __raw_readl(mem);
+ iounmap(mem);
+
+ if (rev < 0x34 || rev > 0x3f) { /* U3H */
+ printk(KERN_ERR "%s: Non-CPC925(U3H) bridge revision: %02x\n",
+ __func__, rev);
+ return 0;
+ }
+
+ pdev = platform_device_register_simple("cpc925_edac", 0, &r, 1);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ printk(KERN_INFO "%s: CPC925 platform device created\n", __func__);
+
+ return 0;
+}
+machine_device_initcall(maple, maple_cpc925_edac_setup);
+#endif
+
+define_machine(maple) {
+ .name = "Maple",
+ .probe = maple_probe,
+ .setup_arch = maple_setup_arch,
+ .discover_phbs = maple_pci_init,
+ .init_IRQ = maple_init_IRQ,
+ .pci_irq_fixup = maple_pci_irq_fixup,
+ .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq,
+ .restart = maple_restart,
+ .halt = maple_halt,
+ .get_boot_time = maple_get_boot_time,
+ .set_rtc_time = maple_set_rtc_time,
+ .get_rtc_time = maple_get_rtc_time,
+ .progress = maple_progress,
+ .power_save = power4_idle,
+};
diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c
new file mode 100644
index 000000000..91606411d
--- /dev/null
+++ b/arch/powerpc/platforms/maple/time.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org),
+ * IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/bcd.h>
+#include <linux/of_address.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+
+#include "maple.h"
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+static int maple_rtc_addr;
+
+static int maple_clock_read(int addr)
+{
+ outb_p(addr, maple_rtc_addr);
+ return inb_p(maple_rtc_addr+1);
+}
+
+static void maple_clock_write(unsigned long val, int addr)
+{
+ outb_p(addr, maple_rtc_addr);
+ outb_p(val, maple_rtc_addr+1);
+}
+
+void maple_get_rtc_time(struct rtc_time *tm)
+{
+ do {
+ tm->tm_sec = maple_clock_read(RTC_SECONDS);
+ tm->tm_min = maple_clock_read(RTC_MINUTES);
+ tm->tm_hour = maple_clock_read(RTC_HOURS);
+ tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH);
+ tm->tm_mon = maple_clock_read(RTC_MONTH);
+ tm->tm_year = maple_clock_read(RTC_YEAR);
+ } while (tm->tm_sec != maple_clock_read(RTC_SECONDS));
+
+ if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY)
+ || RTC_ALWAYS_BCD) {
+ tm->tm_sec = bcd2bin(tm->tm_sec);
+ tm->tm_min = bcd2bin(tm->tm_min);
+ tm->tm_hour = bcd2bin(tm->tm_hour);
+ tm->tm_mday = bcd2bin(tm->tm_mday);
+ tm->tm_mon = bcd2bin(tm->tm_mon);
+ tm->tm_year = bcd2bin(tm->tm_year);
+ }
+ if ((tm->tm_year + 1900) < 1970)
+ tm->tm_year += 100;
+
+ tm->tm_wday = -1;
+}
+
+int maple_set_rtc_time(struct rtc_time *tm)
+{
+ unsigned char save_control, save_freq_select;
+ int sec, min, hour, mon, mday, year;
+
+ spin_lock(&rtc_lock);
+
+ save_control = maple_clock_read(RTC_CONTROL); /* tell the clock it's being set */
+
+ maple_clock_write((save_control|RTC_SET), RTC_CONTROL);
+
+ save_freq_select = maple_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */
+
+ maple_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+ sec = tm->tm_sec;
+ min = tm->tm_min;
+ hour = tm->tm_hour;
+ mon = tm->tm_mon;
+ mday = tm->tm_mday;
+ year = tm->tm_year;
+
+ if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+ sec = bin2bcd(sec);
+ min = bin2bcd(min);
+ hour = bin2bcd(hour);
+ mon = bin2bcd(mon);
+ mday = bin2bcd(mday);
+ year = bin2bcd(year);
+ }
+ maple_clock_write(sec, RTC_SECONDS);
+ maple_clock_write(min, RTC_MINUTES);
+ maple_clock_write(hour, RTC_HOURS);
+ maple_clock_write(mon, RTC_MONTH);
+ maple_clock_write(mday, RTC_DAY_OF_MONTH);
+ maple_clock_write(year, RTC_YEAR);
+
+ /* The following flags have to be released exactly in this order,
+ * otherwise the DS12887 (popular MC146818A clone with integrated
+ * battery and quartz) will not reset the oscillator and will not
+ * update precisely 500 ms later. You won't find this mentioned in
+ * the Dallas Semiconductor data sheets, but who believes data
+ * sheets anyway ... -- Markus Kuhn
+ */
+ maple_clock_write(save_control, RTC_CONTROL);
+ maple_clock_write(save_freq_select, RTC_FREQ_SELECT);
+
+ spin_unlock(&rtc_lock);
+
+ return 0;
+}
+
+static struct resource rtc_iores = {
+ .name = "rtc",
+ .flags = IORESOURCE_IO | IORESOURCE_BUSY,
+};
+
+time64_t __init maple_get_boot_time(void)
+{
+ struct rtc_time tm;
+ struct device_node *rtcs;
+
+ rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00");
+ if (rtcs) {
+ struct resource r;
+ if (of_address_to_resource(rtcs, 0, &r)) {
+ printk(KERN_EMERG "Maple: Unable to translate RTC"
+ " address\n");
+ goto bail;
+ }
+ if (!(r.flags & IORESOURCE_IO)) {
+ printk(KERN_EMERG "Maple: RTC address isn't PIO!\n");
+ goto bail;
+ }
+ maple_rtc_addr = r.start;
+ printk(KERN_INFO "Maple: Found RTC at IO 0x%x\n",
+ maple_rtc_addr);
+ }
+ bail:
+ of_node_put(rtcs);
+ if (maple_rtc_addr == 0) {
+ maple_rtc_addr = RTC_PORT(0); /* legacy address */
+ printk(KERN_INFO "Maple: No device node for RTC, assuming "
+ "legacy address (0x%x)\n", maple_rtc_addr);
+ }
+
+ rtc_iores.start = maple_rtc_addr;
+ rtc_iores.end = maple_rtc_addr + 7;
+ request_resource(&ioport_resource, &rtc_iores);
+
+ maple_get_rtc_time(&tm);
+ return rtc_tm_to_time64(&tm);
+}
+
diff --git a/arch/powerpc/platforms/microwatt/Kconfig b/arch/powerpc/platforms/microwatt/Kconfig
new file mode 100644
index 000000000..6af443a1d
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_MICROWATT
+ depends on PPC_BOOK3S_64 && !SMP
+ bool "Microwatt SoC platform"
+ select PPC_XICS
+ select PPC_ICS_NATIVE
+ select PPC_ICP_NATIVE
+ select PPC_UDBG_16550
+ help
+ This option enables support for FPGA-based Microwatt implementations.
+
diff --git a/arch/powerpc/platforms/microwatt/Makefile b/arch/powerpc/platforms/microwatt/Makefile
new file mode 100644
index 000000000..116d6d3ad
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/Makefile
@@ -0,0 +1 @@
+obj-y += setup.o rng.o
diff --git a/arch/powerpc/platforms/microwatt/microwatt.h b/arch/powerpc/platforms/microwatt/microwatt.h
new file mode 100644
index 000000000..335417e95
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/microwatt.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MICROWATT_H
+#define _MICROWATT_H
+
+void microwatt_rng_init(void);
+
+#endif /* _MICROWATT_H */
diff --git a/arch/powerpc/platforms/microwatt/rng.c b/arch/powerpc/platforms/microwatt/rng.c
new file mode 100644
index 000000000..8ece87d00
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/rng.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from arch/powerpc/platforms/powernv/rng.c, which is:
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "microwatt-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <asm/archrandom.h>
+#include <asm/cputable.h>
+#include <asm/machdep.h>
+#include "microwatt.h"
+
+#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
+
+static int microwatt_get_random_darn(unsigned long *v)
+{
+ unsigned long val;
+
+ /* Using DARN with L=1 - 64-bit conditioned random number */
+ asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+ if (val == DARN_ERR)
+ return 0;
+
+ *v = val;
+
+ return 1;
+}
+
+void __init microwatt_rng_init(void)
+{
+ unsigned long val;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ if (microwatt_get_random_darn(&val)) {
+ ppc_md.get_random_seed = microwatt_get_random_darn;
+ return;
+ }
+ }
+}
diff --git a/arch/powerpc/platforms/microwatt/setup.c b/arch/powerpc/platforms/microwatt/setup.c
new file mode 100644
index 000000000..5e1c09971
--- /dev/null
+++ b/arch/powerpc/platforms/microwatt/setup.c
@@ -0,0 +1,43 @@
+/*
+ * Microwatt FPGA-based SoC platform setup code.
+ *
+ * Copyright 2020 Paul Mackerras (paulus@ozlabs.org), IBM Corp.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/xics.h>
+#include <asm/udbg.h>
+
+#include "microwatt.h"
+
+static void __init microwatt_init_IRQ(void)
+{
+ xics_init();
+}
+
+static int __init microwatt_populate(void)
+{
+ return of_platform_default_populate(NULL, NULL, NULL);
+}
+machine_arch_initcall(microwatt, microwatt_populate);
+
+static void __init microwatt_setup_arch(void)
+{
+ microwatt_rng_init();
+}
+
+define_machine(microwatt) {
+ .name = "microwatt",
+ .compatible = "microwatt-soc",
+ .init_IRQ = microwatt_init_IRQ,
+ .setup_arch = microwatt_setup_arch,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/pasemi/Kconfig b/arch/powerpc/platforms/pasemi/Kconfig
new file mode 100644
index 000000000..85ae18ddd
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/Kconfig
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_PASEMI
+ depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
+ bool "PA Semi SoC-based platforms"
+ select MPIC
+ select FORCE_PCI
+ select PPC_UDBG_16550
+ select PPC_64S_HASH_MMU
+ select PPC_HASH_MMU_NATIVE
+ select MPIC_BROKEN_REGREAD
+ help
+ This option enables support for PA Semi's PWRficient line
+ of SoC processors, including PA6T-1682M
+
+menu "PA Semi PWRficient options"
+ depends on PPC_PASEMI
+
+config PPC_PASEMI_NEMO
+ bool "Nemo motherboard Support"
+ depends on PPC_PASEMI
+ select PPC_I8259
+ help
+ This option enables support for the 'Nemo' motherboard
+ used in A-Eons's Amigaone X1000. This consists of some
+ device tree patches and workarounds for the SB600 South
+ Bridge that provides SATA/USB/Audio.
+
+config PPC_PASEMI_IOMMU
+ bool "PA Semi IOMMU support"
+ depends on PPC_PASEMI
+ help
+ IOMMU support for PA Semi PWRficient
+
+config PPC_PASEMI_IOMMU_DMA_FORCE
+ bool "Force DMA engine to use IOMMU"
+ depends on PPC_PASEMI_IOMMU
+ help
+ This option forces the use of the IOMMU also for the
+ DMA engine. Otherwise the kernel will use it only when
+ running under a hypervisor.
+
+ If in doubt, say "N".
+
+config PPC_PASEMI_MDIO
+ depends on PHYLIB
+ tristate "MDIO support via GPIO"
+ default y
+ help
+ Driver for MDIO via GPIO on PWRficient platforms
+
+endmenu
diff --git a/arch/powerpc/platforms/pasemi/Makefile b/arch/powerpc/platforms/pasemi/Makefile
new file mode 100644
index 000000000..d2ce954a5
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y += setup.o pci.o time.o idle.o powersave.o iommu.o dma_lib.o misc.o
+obj-$(CONFIG_PPC_PASEMI_MDIO) += gpio_mdio.o
+obj-$(CONFIG_PCI_MSI) += msi.o
diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c
new file mode 100644
index 000000000..1be1f18f6
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/dma_lib.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Common functions for DMA access on PA Semi PWRficient
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/sched.h>
+
+#include <asm/pasemi_dma.h>
+
+#define MAX_TXCH 64
+#define MAX_RXCH 64
+#define MAX_FLAGS 64
+#define MAX_FUN 8
+
+static struct pasdma_status *dma_status;
+
+static void __iomem *iob_regs;
+static void __iomem *mac_regs[6];
+static void __iomem *dma_regs;
+
+static int base_hw_irq;
+
+static int num_txch, num_rxch;
+
+static struct pci_dev *dma_pdev;
+
+/* Bitmaps to handle allocation of channels */
+
+static DECLARE_BITMAP(txch_free, MAX_TXCH);
+static DECLARE_BITMAP(rxch_free, MAX_RXCH);
+static DECLARE_BITMAP(flags_free, MAX_FLAGS);
+static DECLARE_BITMAP(fun_free, MAX_FUN);
+
+/* pasemi_read_iob_reg - read IOB register
+ * @reg: Register to read (offset into PCI CFG space)
+ */
+unsigned int pasemi_read_iob_reg(unsigned int reg)
+{
+ return in_le32(iob_regs+reg);
+}
+EXPORT_SYMBOL(pasemi_read_iob_reg);
+
+/* pasemi_write_iob_reg - write IOB register
+ * @reg: Register to write to (offset into PCI CFG space)
+ * @val: Value to write
+ */
+void pasemi_write_iob_reg(unsigned int reg, unsigned int val)
+{
+ out_le32(iob_regs+reg, val);
+}
+EXPORT_SYMBOL(pasemi_write_iob_reg);
+
+/* pasemi_read_mac_reg - read MAC register
+ * @intf: MAC interface
+ * @reg: Register to read (offset into PCI CFG space)
+ */
+unsigned int pasemi_read_mac_reg(int intf, unsigned int reg)
+{
+ return in_le32(mac_regs[intf]+reg);
+}
+EXPORT_SYMBOL(pasemi_read_mac_reg);
+
+/* pasemi_write_mac_reg - write MAC register
+ * @intf: MAC interface
+ * @reg: Register to write to (offset into PCI CFG space)
+ * @val: Value to write
+ */
+void pasemi_write_mac_reg(int intf, unsigned int reg, unsigned int val)
+{
+ out_le32(mac_regs[intf]+reg, val);
+}
+EXPORT_SYMBOL(pasemi_write_mac_reg);
+
+/* pasemi_read_dma_reg - read DMA register
+ * @reg: Register to read (offset into PCI CFG space)
+ */
+unsigned int pasemi_read_dma_reg(unsigned int reg)
+{
+ return in_le32(dma_regs+reg);
+}
+EXPORT_SYMBOL(pasemi_read_dma_reg);
+
+/* pasemi_write_dma_reg - write DMA register
+ * @reg: Register to write to (offset into PCI CFG space)
+ * @val: Value to write
+ */
+void pasemi_write_dma_reg(unsigned int reg, unsigned int val)
+{
+ out_le32(dma_regs+reg, val);
+}
+EXPORT_SYMBOL(pasemi_write_dma_reg);
+
+static int pasemi_alloc_tx_chan(enum pasemi_dmachan_type type)
+{
+ int bit;
+ int start, limit;
+
+ switch (type & (TXCHAN_EVT0|TXCHAN_EVT1)) {
+ case TXCHAN_EVT0:
+ start = 0;
+ limit = 10;
+ break;
+ case TXCHAN_EVT1:
+ start = 10;
+ limit = MAX_TXCH;
+ break;
+ default:
+ start = 0;
+ limit = MAX_TXCH;
+ break;
+ }
+retry:
+ bit = find_next_bit(txch_free, MAX_TXCH, start);
+ if (bit >= limit)
+ return -ENOSPC;
+ if (!test_and_clear_bit(bit, txch_free))
+ goto retry;
+
+ return bit;
+}
+
+static void pasemi_free_tx_chan(int chan)
+{
+ BUG_ON(test_bit(chan, txch_free));
+ set_bit(chan, txch_free);
+}
+
+static int pasemi_alloc_rx_chan(void)
+{
+ int bit;
+retry:
+ bit = find_first_bit(rxch_free, MAX_RXCH);
+ if (bit >= MAX_TXCH)
+ return -ENOSPC;
+ if (!test_and_clear_bit(bit, rxch_free))
+ goto retry;
+
+ return bit;
+}
+
+static void pasemi_free_rx_chan(int chan)
+{
+ BUG_ON(test_bit(chan, rxch_free));
+ set_bit(chan, rxch_free);
+}
+
+/* pasemi_dma_alloc_chan - Allocate a DMA channel
+ * @type: Type of channel to allocate
+ * @total_size: Total size of structure to allocate (to allow for more
+ * room behind the structure to be used by the client)
+ * @offset: Offset in bytes from start of the total structure to the beginning
+ * of struct pasemi_dmachan. Needed when struct pasemi_dmachan is
+ * not the first member of the client structure.
+ *
+ * pasemi_dma_alloc_chan allocates a DMA channel for use by a client. The
+ * type argument specifies whether it's a RX or TX channel, and in the case
+ * of TX channels which group it needs to belong to (if any).
+ *
+ * Returns a pointer to the total structure allocated on success, NULL
+ * on failure.
+ */
+void *pasemi_dma_alloc_chan(enum pasemi_dmachan_type type,
+ int total_size, int offset)
+{
+ void *buf;
+ struct pasemi_dmachan *chan;
+ int chno;
+
+ BUG_ON(total_size < sizeof(struct pasemi_dmachan));
+
+ buf = kzalloc(total_size, GFP_KERNEL);
+
+ if (!buf)
+ return NULL;
+ chan = buf + offset;
+
+ chan->priv = buf;
+
+ switch (type & (TXCHAN|RXCHAN)) {
+ case RXCHAN:
+ chno = pasemi_alloc_rx_chan();
+ chan->chno = chno;
+ chan->irq = irq_create_mapping(NULL,
+ base_hw_irq + num_txch + chno);
+ chan->status = &dma_status->rx_sta[chno];
+ break;
+ case TXCHAN:
+ chno = pasemi_alloc_tx_chan(type);
+ chan->chno = chno;
+ chan->irq = irq_create_mapping(NULL, base_hw_irq + chno);
+ chan->status = &dma_status->tx_sta[chno];
+ break;
+ }
+
+ chan->chan_type = type;
+
+ return chan;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_chan);
+
+/* pasemi_dma_free_chan - Free a previously allocated channel
+ * @chan: Channel to free
+ *
+ * Frees a previously allocated channel. It will also deallocate any
+ * descriptor ring associated with the channel, if allocated.
+ */
+void pasemi_dma_free_chan(struct pasemi_dmachan *chan)
+{
+ if (chan->ring_virt)
+ pasemi_dma_free_ring(chan);
+
+ switch (chan->chan_type & (RXCHAN|TXCHAN)) {
+ case RXCHAN:
+ pasemi_free_rx_chan(chan->chno);
+ break;
+ case TXCHAN:
+ pasemi_free_tx_chan(chan->chno);
+ break;
+ }
+
+ kfree(chan->priv);
+}
+EXPORT_SYMBOL(pasemi_dma_free_chan);
+
+/* pasemi_dma_alloc_ring - Allocate descriptor ring for a channel
+ * @chan: Channel for which to allocate
+ * @ring_size: Ring size in 64-bit (8-byte) words
+ *
+ * Allocate a descriptor ring for a channel. Returns 0 on success, errno
+ * on failure. The passed in struct pasemi_dmachan is updated with the
+ * virtual and DMA addresses of the ring.
+ */
+int pasemi_dma_alloc_ring(struct pasemi_dmachan *chan, int ring_size)
+{
+ BUG_ON(chan->ring_virt);
+
+ chan->ring_size = ring_size;
+
+ chan->ring_virt = dma_alloc_coherent(&dma_pdev->dev,
+ ring_size * sizeof(u64),
+ &chan->ring_dma, GFP_KERNEL);
+
+ if (!chan->ring_virt)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_ring);
+
+/* pasemi_dma_free_ring - Free an allocated descriptor ring for a channel
+ * @chan: Channel for which to free the descriptor ring
+ *
+ * Frees a previously allocated descriptor ring for a channel.
+ */
+void pasemi_dma_free_ring(struct pasemi_dmachan *chan)
+{
+ BUG_ON(!chan->ring_virt);
+
+ dma_free_coherent(&dma_pdev->dev, chan->ring_size * sizeof(u64),
+ chan->ring_virt, chan->ring_dma);
+ chan->ring_virt = NULL;
+ chan->ring_size = 0;
+ chan->ring_dma = 0;
+}
+EXPORT_SYMBOL(pasemi_dma_free_ring);
+
+/* pasemi_dma_start_chan - Start a DMA channel
+ * @chan: Channel to start
+ * @cmdsta: Additional CCMDSTA/TCMDSTA bits to write
+ *
+ * Enables (starts) a DMA channel with optional additional arguments.
+ */
+void pasemi_dma_start_chan(const struct pasemi_dmachan *chan, const u32 cmdsta)
+{
+ if (chan->chan_type == RXCHAN)
+ pasemi_write_dma_reg(PAS_DMA_RXCHAN_CCMDSTA(chan->chno),
+ cmdsta | PAS_DMA_RXCHAN_CCMDSTA_EN);
+ else
+ pasemi_write_dma_reg(PAS_DMA_TXCHAN_TCMDSTA(chan->chno),
+ cmdsta | PAS_DMA_TXCHAN_TCMDSTA_EN);
+}
+EXPORT_SYMBOL(pasemi_dma_start_chan);
+
+/* pasemi_dma_stop_chan - Stop a DMA channel
+ * @chan: Channel to stop
+ *
+ * Stops (disables) a DMA channel. This is done by setting the ST bit in the
+ * CMDSTA register and waiting on the ACT (active) bit to clear, then
+ * finally disabling the whole channel.
+ *
+ * This function will only try for a short while for the channel to stop, if
+ * it doesn't it will return failure.
+ *
+ * Returns 1 on success, 0 on failure.
+ */
+#define MAX_RETRIES 5000
+int pasemi_dma_stop_chan(const struct pasemi_dmachan *chan)
+{
+ int reg, retries;
+ u32 sta;
+
+ if (chan->chan_type == RXCHAN) {
+ reg = PAS_DMA_RXCHAN_CCMDSTA(chan->chno);
+ pasemi_write_dma_reg(reg, PAS_DMA_RXCHAN_CCMDSTA_ST);
+ for (retries = 0; retries < MAX_RETRIES; retries++) {
+ sta = pasemi_read_dma_reg(reg);
+ if (!(sta & PAS_DMA_RXCHAN_CCMDSTA_ACT)) {
+ pasemi_write_dma_reg(reg, 0);
+ return 1;
+ }
+ cond_resched();
+ }
+ } else {
+ reg = PAS_DMA_TXCHAN_TCMDSTA(chan->chno);
+ pasemi_write_dma_reg(reg, PAS_DMA_TXCHAN_TCMDSTA_ST);
+ for (retries = 0; retries < MAX_RETRIES; retries++) {
+ sta = pasemi_read_dma_reg(reg);
+ if (!(sta & PAS_DMA_TXCHAN_TCMDSTA_ACT)) {
+ pasemi_write_dma_reg(reg, 0);
+ return 1;
+ }
+ cond_resched();
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(pasemi_dma_stop_chan);
+
+/* pasemi_dma_alloc_buf - Allocate a buffer to use for DMA
+ * @chan: Channel to allocate for
+ * @size: Size of buffer in bytes
+ * @handle: DMA handle
+ *
+ * Allocate a buffer to be used by the DMA engine for read/write,
+ * similar to dma_alloc_coherent().
+ *
+ * Returns the virtual address of the buffer, or NULL in case of failure.
+ */
+void *pasemi_dma_alloc_buf(struct pasemi_dmachan *chan, int size,
+ dma_addr_t *handle)
+{
+ return dma_alloc_coherent(&dma_pdev->dev, size, handle, GFP_KERNEL);
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_buf);
+
+/* pasemi_dma_free_buf - Free a buffer used for DMA
+ * @chan: Channel the buffer was allocated for
+ * @size: Size of buffer in bytes
+ * @handle: DMA handle
+ *
+ * Frees a previously allocated buffer.
+ */
+void pasemi_dma_free_buf(struct pasemi_dmachan *chan, int size,
+ dma_addr_t *handle)
+{
+ dma_free_coherent(&dma_pdev->dev, size, handle, GFP_KERNEL);
+}
+EXPORT_SYMBOL(pasemi_dma_free_buf);
+
+/* pasemi_dma_alloc_flag - Allocate a flag (event) for channel synchronization
+ *
+ * Allocates a flag for use with channel synchronization (event descriptors).
+ * Returns allocated flag (0-63), < 0 on error.
+ */
+int pasemi_dma_alloc_flag(void)
+{
+ int bit;
+
+retry:
+ bit = find_first_bit(flags_free, MAX_FLAGS);
+ if (bit >= MAX_FLAGS)
+ return -ENOSPC;
+ if (!test_and_clear_bit(bit, flags_free))
+ goto retry;
+
+ return bit;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_flag);
+
+
+/* pasemi_dma_free_flag - Deallocates a flag (event)
+ * @flag: Flag number to deallocate
+ *
+ * Frees up a flag so it can be reused for other purposes.
+ */
+void pasemi_dma_free_flag(int flag)
+{
+ BUG_ON(test_bit(flag, flags_free));
+ BUG_ON(flag >= MAX_FLAGS);
+ set_bit(flag, flags_free);
+}
+EXPORT_SYMBOL(pasemi_dma_free_flag);
+
+
+/* pasemi_dma_set_flag - Sets a flag (event) to 1
+ * @flag: Flag number to set active
+ *
+ * Sets the flag provided to 1.
+ */
+void pasemi_dma_set_flag(int flag)
+{
+ BUG_ON(flag >= MAX_FLAGS);
+ if (flag < 32)
+ pasemi_write_dma_reg(PAS_DMA_TXF_SFLG0, 1 << flag);
+ else
+ pasemi_write_dma_reg(PAS_DMA_TXF_SFLG1, 1 << flag);
+}
+EXPORT_SYMBOL(pasemi_dma_set_flag);
+
+/* pasemi_dma_clear_flag - Sets a flag (event) to 0
+ * @flag: Flag number to set inactive
+ *
+ * Sets the flag provided to 0.
+ */
+void pasemi_dma_clear_flag(int flag)
+{
+ BUG_ON(flag >= MAX_FLAGS);
+ if (flag < 32)
+ pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 1 << flag);
+ else
+ pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 1 << flag);
+}
+EXPORT_SYMBOL(pasemi_dma_clear_flag);
+
+/* pasemi_dma_alloc_fun - Allocate a function engine
+ *
+ * Allocates a function engine to use for crypto/checksum offload
+ * Returns allocated engine (0-8), < 0 on error.
+ */
+int pasemi_dma_alloc_fun(void)
+{
+ int bit;
+
+retry:
+ bit = find_first_bit(fun_free, MAX_FLAGS);
+ if (bit >= MAX_FLAGS)
+ return -ENOSPC;
+ if (!test_and_clear_bit(bit, fun_free))
+ goto retry;
+
+ return bit;
+}
+EXPORT_SYMBOL(pasemi_dma_alloc_fun);
+
+
+/* pasemi_dma_free_fun - Deallocates a function engine
+ * @flag: Engine number to deallocate
+ *
+ * Frees up a function engine so it can be used for other purposes.
+ */
+void pasemi_dma_free_fun(int fun)
+{
+ BUG_ON(test_bit(fun, fun_free));
+ BUG_ON(fun >= MAX_FLAGS);
+ set_bit(fun, fun_free);
+}
+EXPORT_SYMBOL(pasemi_dma_free_fun);
+
+
+static void *map_onedev(struct pci_dev *p, int index)
+{
+ struct device_node *dn;
+ void __iomem *ret;
+
+ dn = pci_device_to_OF_node(p);
+ if (!dn)
+ goto fallback;
+
+ ret = of_iomap(dn, index);
+ if (!ret)
+ goto fallback;
+
+ return ret;
+fallback:
+ /* This is hardcoded and ugly, but we have some firmware versions
+ * that don't provide the register space in the device tree. Luckily
+ * they are at well-known locations so we can just do the math here.
+ */
+ return ioremap(0xe0000000 + (p->devfn << 12), 0x2000);
+}
+
+/* pasemi_dma_init - Initialize the PA Semi DMA library
+ *
+ * This function initializes the DMA library. It must be called before
+ * any other function in the library.
+ *
+ * Returns 0 on success, errno on failure.
+ */
+int pasemi_dma_init(void)
+{
+ static DEFINE_SPINLOCK(init_lock);
+ struct pci_dev *iob_pdev;
+ struct pci_dev *pdev;
+ struct resource res;
+ struct device_node *dn;
+ int i, intf, err = 0;
+ unsigned long timeout;
+ u32 tmp;
+
+ if (!machine_is(pasemi))
+ return -ENODEV;
+
+ spin_lock(&init_lock);
+
+ /* Make sure we haven't already initialized */
+ if (dma_pdev)
+ goto out;
+
+ iob_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL);
+ if (!iob_pdev) {
+ BUG();
+ pr_warn("Can't find I/O Bridge\n");
+ err = -ENODEV;
+ goto out;
+ }
+ iob_regs = map_onedev(iob_pdev, 0);
+
+ dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL);
+ if (!dma_pdev) {
+ BUG();
+ pr_warn("Can't find DMA controller\n");
+ err = -ENODEV;
+ goto out;
+ }
+ dma_regs = map_onedev(dma_pdev, 0);
+ base_hw_irq = virq_to_hw(dma_pdev->irq);
+
+ pci_read_config_dword(dma_pdev, PAS_DMA_CAP_TXCH, &tmp);
+ num_txch = (tmp & PAS_DMA_CAP_TXCH_TCHN_M) >> PAS_DMA_CAP_TXCH_TCHN_S;
+
+ pci_read_config_dword(dma_pdev, PAS_DMA_CAP_RXCH, &tmp);
+ num_rxch = (tmp & PAS_DMA_CAP_RXCH_RCHN_M) >> PAS_DMA_CAP_RXCH_RCHN_S;
+
+ intf = 0;
+ for (pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa006, NULL);
+ pdev;
+ pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa006, pdev))
+ mac_regs[intf++] = map_onedev(pdev, 0);
+
+ pci_dev_put(pdev);
+
+ for (pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa005, NULL);
+ pdev;
+ pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa005, pdev))
+ mac_regs[intf++] = map_onedev(pdev, 0);
+
+ pci_dev_put(pdev);
+
+ dn = pci_device_to_OF_node(iob_pdev);
+ if (dn)
+ err = of_address_to_resource(dn, 1, &res);
+ if (!dn || err) {
+ /* Fallback for old firmware */
+ res.start = 0xfd800000;
+ res.end = res.start + 0x1000;
+ }
+ dma_status = ioremap_cache(res.start, resource_size(&res));
+ pci_dev_put(iob_pdev);
+
+ for (i = 0; i < MAX_TXCH; i++)
+ __set_bit(i, txch_free);
+
+ for (i = 0; i < MAX_RXCH; i++)
+ __set_bit(i, rxch_free);
+
+ timeout = jiffies + HZ;
+ pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, 0);
+ while (pasemi_read_dma_reg(PAS_DMA_COM_RXSTA) & 1) {
+ if (time_after(jiffies, timeout)) {
+ pr_warn("Warning: Could not disable RX section\n");
+ break;
+ }
+ }
+
+ timeout = jiffies + HZ;
+ pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, 0);
+ while (pasemi_read_dma_reg(PAS_DMA_COM_TXSTA) & 1) {
+ if (time_after(jiffies, timeout)) {
+ pr_warn("Warning: Could not disable TX section\n");
+ break;
+ }
+ }
+
+ /* setup resource allocations for the different DMA sections */
+ tmp = pasemi_read_dma_reg(PAS_DMA_COM_CFG);
+ pasemi_write_dma_reg(PAS_DMA_COM_CFG, tmp | 0x18000000);
+
+ /* enable tx section */
+ pasemi_write_dma_reg(PAS_DMA_COM_TXCMD, PAS_DMA_COM_TXCMD_EN);
+
+ /* enable rx section */
+ pasemi_write_dma_reg(PAS_DMA_COM_RXCMD, PAS_DMA_COM_RXCMD_EN);
+
+ for (i = 0; i < MAX_FLAGS; i++)
+ __set_bit(i, flags_free);
+
+ for (i = 0; i < MAX_FUN; i++)
+ __set_bit(i, fun_free);
+
+ /* clear all status flags */
+ pasemi_write_dma_reg(PAS_DMA_TXF_CFLG0, 0xffffffff);
+ pasemi_write_dma_reg(PAS_DMA_TXF_CFLG1, 0xffffffff);
+
+ pr_info("PA Semi PWRficient DMA library initialized "
+ "(%d tx, %d rx channels)\n", num_txch, num_rxch);
+
+out:
+ spin_unlock(&init_lock);
+ return err;
+}
+EXPORT_SYMBOL(pasemi_dma_init);
diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c
new file mode 100644
index 000000000..fd130fe7a
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c
@@ -0,0 +1,327 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Author: Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on drivers/net/fs_enet/mii-bitbang.c.
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/phy.h>
+#include <linux/of_address.h>
+#include <linux/of_mdio.h>
+#include <linux/platform_device.h>
+
+#define DELAY 1
+
+static void __iomem *gpio_regs;
+
+struct gpio_priv {
+ int mdc_pin;
+ int mdio_pin;
+};
+
+#define MDC_PIN(bus) (((struct gpio_priv *)bus->priv)->mdc_pin)
+#define MDIO_PIN(bus) (((struct gpio_priv *)bus->priv)->mdio_pin)
+
+static inline void mdio_lo(struct mii_bus *bus)
+{
+ out_le32(gpio_regs+0x10, 1 << MDIO_PIN(bus));
+}
+
+static inline void mdio_hi(struct mii_bus *bus)
+{
+ out_le32(gpio_regs, 1 << MDIO_PIN(bus));
+}
+
+static inline void mdc_lo(struct mii_bus *bus)
+{
+ out_le32(gpio_regs+0x10, 1 << MDC_PIN(bus));
+}
+
+static inline void mdc_hi(struct mii_bus *bus)
+{
+ out_le32(gpio_regs, 1 << MDC_PIN(bus));
+}
+
+static inline void mdio_active(struct mii_bus *bus)
+{
+ out_le32(gpio_regs+0x20, (1 << MDC_PIN(bus)) | (1 << MDIO_PIN(bus)));
+}
+
+static inline void mdio_tristate(struct mii_bus *bus)
+{
+ out_le32(gpio_regs+0x30, (1 << MDIO_PIN(bus)));
+}
+
+static inline int mdio_read(struct mii_bus *bus)
+{
+ return !!(in_le32(gpio_regs+0x40) & (1 << MDIO_PIN(bus)));
+}
+
+static void clock_out(struct mii_bus *bus, int bit)
+{
+ if (bit)
+ mdio_hi(bus);
+ else
+ mdio_lo(bus);
+ udelay(DELAY);
+ mdc_hi(bus);
+ udelay(DELAY);
+ mdc_lo(bus);
+}
+
+/* Utility to send the preamble, address, and register (common to read and write). */
+static void bitbang_pre(struct mii_bus *bus, int read, u8 addr, u8 reg)
+{
+ int i;
+
+ /* CFE uses a really long preamble (40 bits). We'll do the same. */
+ mdio_active(bus);
+ for (i = 0; i < 40; i++) {
+ clock_out(bus, 1);
+ }
+
+ /* send the start bit (01) and the read opcode (10) or write (10) */
+ clock_out(bus, 0);
+ clock_out(bus, 1);
+
+ clock_out(bus, read);
+ clock_out(bus, !read);
+
+ /* send the PHY address */
+ for (i = 0; i < 5; i++) {
+ clock_out(bus, (addr & 0x10) != 0);
+ addr <<= 1;
+ }
+
+ /* send the register address */
+ for (i = 0; i < 5; i++) {
+ clock_out(bus, (reg & 0x10) != 0);
+ reg <<= 1;
+ }
+}
+
+static int gpio_mdio_read(struct mii_bus *bus, int phy_id, int location)
+{
+ u16 rdreg;
+ int ret, i;
+ u8 addr = phy_id & 0xff;
+ u8 reg = location & 0xff;
+
+ bitbang_pre(bus, 1, addr, reg);
+
+ /* tri-state our MDIO I/O pin so we can read */
+ mdio_tristate(bus);
+ udelay(DELAY);
+ mdc_hi(bus);
+ udelay(DELAY);
+ mdc_lo(bus);
+
+ /* read 16 bits of register data, MSB first */
+ rdreg = 0;
+ for (i = 0; i < 16; i++) {
+ mdc_lo(bus);
+ udelay(DELAY);
+ mdc_hi(bus);
+ udelay(DELAY);
+ mdc_lo(bus);
+ udelay(DELAY);
+ rdreg <<= 1;
+ rdreg |= mdio_read(bus);
+ }
+
+ mdc_hi(bus);
+ udelay(DELAY);
+ mdc_lo(bus);
+ udelay(DELAY);
+
+ ret = rdreg;
+
+ return ret;
+}
+
+static int gpio_mdio_write(struct mii_bus *bus, int phy_id, int location, u16 val)
+{
+ int i;
+
+ u8 addr = phy_id & 0xff;
+ u8 reg = location & 0xff;
+ u16 value = val & 0xffff;
+
+ bitbang_pre(bus, 0, addr, reg);
+
+ /* send the turnaround (10) */
+ mdc_lo(bus);
+ mdio_hi(bus);
+ udelay(DELAY);
+ mdc_hi(bus);
+ udelay(DELAY);
+ mdc_lo(bus);
+ mdio_lo(bus);
+ udelay(DELAY);
+ mdc_hi(bus);
+ udelay(DELAY);
+
+ /* write 16 bits of register data, MSB first */
+ for (i = 0; i < 16; i++) {
+ mdc_lo(bus);
+ if (value & 0x8000)
+ mdio_hi(bus);
+ else
+ mdio_lo(bus);
+ udelay(DELAY);
+ mdc_hi(bus);
+ udelay(DELAY);
+ value <<= 1;
+ }
+
+ /*
+ * Tri-state the MDIO line.
+ */
+ mdio_tristate(bus);
+ mdc_lo(bus);
+ udelay(DELAY);
+ mdc_hi(bus);
+ udelay(DELAY);
+ return 0;
+}
+
+static int gpio_mdio_reset(struct mii_bus *bus)
+{
+ /*nothing here - dunno how to reset it*/
+ return 0;
+}
+
+
+static int gpio_mdio_probe(struct platform_device *ofdev)
+{
+ struct device *dev = &ofdev->dev;
+ struct device_node *np = ofdev->dev.of_node;
+ struct mii_bus *new_bus;
+ struct gpio_priv *priv;
+ const unsigned int *prop;
+ int err;
+
+ err = -ENOMEM;
+ priv = kzalloc(sizeof(struct gpio_priv), GFP_KERNEL);
+ if (!priv)
+ goto out;
+
+ new_bus = mdiobus_alloc();
+
+ if (!new_bus)
+ goto out_free_priv;
+
+ new_bus->name = "pasemi gpio mdio bus";
+ new_bus->read = &gpio_mdio_read;
+ new_bus->write = &gpio_mdio_write;
+ new_bus->reset = &gpio_mdio_reset;
+
+ prop = of_get_property(np, "reg", NULL);
+ snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", *prop);
+ new_bus->priv = priv;
+
+ prop = of_get_property(np, "mdc-pin", NULL);
+ priv->mdc_pin = *prop;
+
+ prop = of_get_property(np, "mdio-pin", NULL);
+ priv->mdio_pin = *prop;
+
+ new_bus->parent = dev;
+ dev_set_drvdata(dev, new_bus);
+
+ err = of_mdiobus_register(new_bus, np);
+
+ if (err != 0) {
+ pr_err("%s: Cannot register as MDIO bus, err %d\n",
+ new_bus->name, err);
+ goto out_free_irq;
+ }
+
+ return 0;
+
+out_free_irq:
+ kfree(new_bus);
+out_free_priv:
+ kfree(priv);
+out:
+ return err;
+}
+
+
+static int gpio_mdio_remove(struct platform_device *dev)
+{
+ struct mii_bus *bus = dev_get_drvdata(&dev->dev);
+
+ mdiobus_unregister(bus);
+
+ dev_set_drvdata(&dev->dev, NULL);
+
+ kfree(bus->priv);
+ bus->priv = NULL;
+ mdiobus_free(bus);
+
+ return 0;
+}
+
+static const struct of_device_id gpio_mdio_match[] =
+{
+ {
+ .compatible = "gpio-mdio",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, gpio_mdio_match);
+
+static struct platform_driver gpio_mdio_driver =
+{
+ .probe = gpio_mdio_probe,
+ .remove = gpio_mdio_remove,
+ .driver = {
+ .name = "gpio-mdio-bitbang",
+ .of_match_table = gpio_mdio_match,
+ },
+};
+
+static int __init gpio_mdio_init(void)
+{
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, "1682m-gpio");
+ if (!np)
+ np = of_find_compatible_node(NULL, NULL,
+ "pasemi,pwrficient-gpio");
+ if (!np)
+ return -ENODEV;
+ gpio_regs = of_iomap(np, 0);
+ of_node_put(np);
+
+ if (!gpio_regs)
+ return -ENODEV;
+
+ return platform_driver_register(&gpio_mdio_driver);
+}
+module_init(gpio_mdio_init);
+
+static void __exit gpio_mdio_exit(void)
+{
+ platform_driver_unregister(&gpio_mdio_driver);
+ if (gpio_regs)
+ iounmap(gpio_regs);
+}
+module_exit(gpio_mdio_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Olof Johansson <olof@lixom.net>");
+MODULE_DESCRIPTION("Driver for MDIO over GPIO on PA Semi PWRficient-based boards");
diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c
new file mode 100644
index 000000000..6087c70ed
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/idle.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/irq.h>
+
+#include <asm/machdep.h>
+#include <asm/reg.h>
+#include <asm/smp.h>
+
+#include "pasemi.h"
+
+struct sleep_mode {
+ char *name;
+ void (*entry)(void);
+};
+
+static struct sleep_mode modes[] = {
+ { .name = "spin", .entry = &idle_spin },
+ { .name = "doze", .entry = &idle_doze },
+};
+
+static int current_mode = 0;
+
+static int pasemi_system_reset_exception(struct pt_regs *regs)
+{
+ /* If we were woken up from power savings, we need to return
+ * to the calling function, since nip is not saved across
+ * all modes.
+ */
+
+ if (regs->msr & SRR1_WAKEMASK)
+ regs_set_return_ip(regs, regs->link);
+
+ switch (regs->msr & SRR1_WAKEMASK) {
+ case SRR1_WAKEDEC:
+ set_dec(1);
+ break;
+ case SRR1_WAKEEE:
+ /*
+ * Handle these when interrupts get re-enabled and we take
+ * them as regular exceptions. We are in an NMI context
+ * and can't handle these here.
+ */
+ break;
+ default:
+ /* do system reset */
+ return 0;
+ }
+
+ /* Set higher astate since we come out of power savings at 0 */
+ restore_astate(hard_smp_processor_id());
+
+ /* everything handled */
+ regs_set_recoverable(regs);
+ return 1;
+}
+
+static int __init pasemi_idle_init(void)
+{
+#ifndef CONFIG_PPC_PASEMI_CPUFREQ
+ pr_warn("No cpufreq driver, powersavings modes disabled\n");
+ current_mode = 0;
+#endif
+
+ ppc_md.system_reset_exception = pasemi_system_reset_exception;
+ ppc_md.power_save = modes[current_mode].entry;
+ pr_info("Using PA6T idle loop (%s)\n", modes[current_mode].name);
+
+ return 0;
+}
+machine_late_initcall(pasemi, pasemi_idle_init);
+
+static int __init idle_param(char *p)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(modes); i++) {
+ if (!strcmp(modes[i].name, p)) {
+ current_mode = i;
+ break;
+ }
+ }
+ return 0;
+}
+
+early_param("idle", idle_param);
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
new file mode 100644
index 000000000..375487cba
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2005-2008, PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ */
+
+#undef DEBUG
+
+#include <linux/memblock.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "pasemi.h"
+
+#define IOBMAP_PAGE_SHIFT 12
+#define IOBMAP_PAGE_SIZE (1 << IOBMAP_PAGE_SHIFT)
+#define IOBMAP_PAGE_MASK (IOBMAP_PAGE_SIZE - 1)
+
+#define IOB_BASE 0xe0000000
+#define IOB_SIZE 0x3000
+/* Configuration registers */
+#define IOBCAP_REG 0x40
+#define IOBCOM_REG 0x100
+/* Enable IOB address translation */
+#define IOBCOM_ATEN 0x00000100
+
+/* Address decode configuration register */
+#define IOB_AD_REG 0x14c
+/* IOBCOM_AD_REG fields */
+#define IOB_AD_VGPRT 0x00000e00
+#define IOB_AD_VGAEN 0x00000100
+/* Direct mapping settings */
+#define IOB_AD_MPSEL_MASK 0x00000030
+#define IOB_AD_MPSEL_B38 0x00000000
+#define IOB_AD_MPSEL_B40 0x00000010
+#define IOB_AD_MPSEL_B42 0x00000020
+/* Translation window size / enable */
+#define IOB_AD_TRNG_MASK 0x00000003
+#define IOB_AD_TRNG_256M 0x00000000
+#define IOB_AD_TRNG_2G 0x00000001
+#define IOB_AD_TRNG_128G 0x00000003
+
+#define IOB_TABLEBASE_REG 0x154
+
+/* Base of the 64 4-byte L1 registers */
+#define IOB_XLT_L1_REGBASE 0x2b00
+
+/* Register to invalidate TLB entries */
+#define IOB_AT_INVAL_TLB_REG 0x2d00
+
+/* The top two bits of the level 1 entry contains valid and type flags */
+#define IOBMAP_L1E_V 0x40000000
+#define IOBMAP_L1E_V_B 0x80000000
+
+/* For big page entries, the bottom two bits contains flags */
+#define IOBMAP_L1E_BIG_CACHED 0x00000002
+#define IOBMAP_L1E_BIG_PRIORITY 0x00000001
+
+/* For regular level 2 entries, top 2 bits contain valid and cache flags */
+#define IOBMAP_L2E_V 0x80000000
+#define IOBMAP_L2E_V_CACHED 0xc0000000
+
+static void __iomem *iob;
+static u32 iob_l1_emptyval;
+static u32 iob_l2_emptyval;
+static u32 *iob_l2_base;
+
+static struct iommu_table iommu_table_iobmap;
+static int iommu_table_iobmap_inited;
+
+static int iobmap_build(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ u32 *ip;
+ u32 rpn;
+ unsigned long bus_addr;
+
+ pr_debug("iobmap: build at: %lx, %lx, addr: %lx\n", index, npages, uaddr);
+
+ bus_addr = (tbl->it_offset + index) << IOBMAP_PAGE_SHIFT;
+
+ ip = ((u32 *)tbl->it_base) + index;
+
+ while (npages--) {
+ rpn = __pa(uaddr) >> IOBMAP_PAGE_SHIFT;
+
+ *(ip++) = IOBMAP_L2E_V | rpn;
+ /* invalidate tlb, can be optimized more */
+ out_le32(iob+IOB_AT_INVAL_TLB_REG, bus_addr >> 14);
+
+ uaddr += IOBMAP_PAGE_SIZE;
+ bus_addr += IOBMAP_PAGE_SIZE;
+ }
+ return 0;
+}
+
+
+static void iobmap_free(struct iommu_table *tbl, long index,
+ long npages)
+{
+ u32 *ip;
+ unsigned long bus_addr;
+
+ pr_debug("iobmap: free at: %lx, %lx\n", index, npages);
+
+ bus_addr = (tbl->it_offset + index) << IOBMAP_PAGE_SHIFT;
+
+ ip = ((u32 *)tbl->it_base) + index;
+
+ while (npages--) {
+ *(ip++) = iob_l2_emptyval;
+ /* invalidate tlb, can be optimized more */
+ out_le32(iob+IOB_AT_INVAL_TLB_REG, bus_addr >> 14);
+ bus_addr += IOBMAP_PAGE_SIZE;
+ }
+}
+
+static struct iommu_table_ops iommu_table_iobmap_ops = {
+ .set = iobmap_build,
+ .clear = iobmap_free
+};
+
+static void iommu_table_iobmap_setup(void)
+{
+ pr_debug(" -> %s\n", __func__);
+ iommu_table_iobmap.it_busno = 0;
+ iommu_table_iobmap.it_offset = 0;
+ iommu_table_iobmap.it_page_shift = IOBMAP_PAGE_SHIFT;
+
+ /* it_size is in number of entries */
+ iommu_table_iobmap.it_size =
+ 0x80000000 >> iommu_table_iobmap.it_page_shift;
+
+ /* Initialize the common IOMMU code */
+ iommu_table_iobmap.it_base = (unsigned long)iob_l2_base;
+ iommu_table_iobmap.it_index = 0;
+ /* XXXOJN tune this to avoid IOB cache invals.
+ * Should probably be 8 (64 bytes)
+ */
+ iommu_table_iobmap.it_blocksize = 4;
+ iommu_table_iobmap.it_ops = &iommu_table_iobmap_ops;
+ if (!iommu_init_table(&iommu_table_iobmap, 0, 0, 0))
+ panic("Failed to initialize iommu table");
+
+ pr_debug(" <- %s\n", __func__);
+}
+
+
+
+static void pci_dma_bus_setup_pasemi(struct pci_bus *bus)
+{
+ pr_debug("pci_dma_bus_setup, bus %p, bus->self %p\n", bus, bus->self);
+
+ if (!iommu_table_iobmap_inited) {
+ iommu_table_iobmap_inited = 1;
+ iommu_table_iobmap_setup();
+ }
+}
+
+
+static void pci_dma_dev_setup_pasemi(struct pci_dev *dev)
+{
+ pr_debug("pci_dma_dev_setup, dev %p (%s)\n", dev, pci_name(dev));
+
+#if !defined(CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE)
+ /* For non-LPAR environment, don't translate anything for the DMA
+ * engine. The exception to this is if the user has enabled
+ * CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE at build time.
+ */
+ if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
+ !firmware_has_feature(FW_FEATURE_LPAR)) {
+ dev->dev.dma_ops = NULL;
+ /*
+ * Set the coherent DMA mask to prevent the iommu
+ * being used unnecessarily
+ */
+ dev->dev.coherent_dma_mask = DMA_BIT_MASK(44);
+ return;
+ }
+#endif
+
+ set_iommu_table_base(&dev->dev, &iommu_table_iobmap);
+}
+
+static int __init iob_init(struct device_node *dn)
+{
+ unsigned long tmp;
+ u32 regword;
+ int i;
+
+ pr_debug(" -> %s\n", __func__);
+
+ /* For 2G space, 8x64 pages (2^21 bytes) is max total l2 size */
+ iob_l2_base = memblock_alloc_try_nid_raw(1UL << 21, 1UL << 21,
+ MEMBLOCK_LOW_LIMIT, 0x80000000,
+ NUMA_NO_NODE);
+ if (!iob_l2_base)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx max_addr=%x\n",
+ __func__, 1UL << 21, 1UL << 21, 0x80000000);
+
+ pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base);
+
+ /* Allocate a spare page to map all invalid IOTLB pages. */
+ tmp = memblock_phys_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
+ if (!tmp)
+ panic("IOBMAP: Cannot allocate spare page!");
+ /* Empty l1 is marked invalid */
+ iob_l1_emptyval = 0;
+ /* Empty l2 is mapped to dummy page */
+ iob_l2_emptyval = IOBMAP_L2E_V | (tmp >> IOBMAP_PAGE_SHIFT);
+
+ iob = ioremap(IOB_BASE, IOB_SIZE);
+ if (!iob)
+ panic("IOBMAP: Cannot map registers!");
+
+ /* setup direct mapping of the L1 entries */
+ for (i = 0; i < 64; i++) {
+ /* Each L1 covers 32MB, i.e. 8K entries = 32K of ram */
+ regword = IOBMAP_L1E_V | (__pa(iob_l2_base + i*0x2000) >> 12);
+ out_le32(iob+IOB_XLT_L1_REGBASE+i*4, regword);
+ }
+
+ /* set 2GB translation window, based at 0 */
+ regword = in_le32(iob+IOB_AD_REG);
+ regword &= ~IOB_AD_TRNG_MASK;
+ regword |= IOB_AD_TRNG_2G;
+ out_le32(iob+IOB_AD_REG, regword);
+
+ /* Enable translation */
+ regword = in_le32(iob+IOBCOM_REG);
+ regword |= IOBCOM_ATEN;
+ out_le32(iob+IOBCOM_REG, regword);
+
+ pr_debug(" <- %s\n", __func__);
+
+ return 0;
+}
+
+
+/* These are called very early. */
+void __init iommu_init_early_pasemi(void)
+{
+ int iommu_off;
+
+#ifndef CONFIG_PPC_PASEMI_IOMMU
+ iommu_off = 1;
+#else
+ iommu_off = of_chosen &&
+ of_property_read_bool(of_chosen, "linux,iommu-off");
+#endif
+ if (iommu_off)
+ return;
+
+ iob_init(NULL);
+
+ pasemi_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pasemi;
+ pasemi_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pasemi;
+ set_pci_dma_ops(&dma_iommu_ops);
+}
diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c
new file mode 100644
index 000000000..9e9a7e462
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/misc.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2007 PA Semi, Inc
+ *
+ * Parts based on arch/powerpc/sysdev/fsl_soc.c:
+ *
+ * 2006 (c) MontaVista Software, Inc.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/i2c.h>
+
+#ifdef CONFIG_I2C_BOARDINFO
+/* The below is from fsl_soc.c. It's copied because since there are no
+ * official bus bindings at this time it doesn't make sense to share across
+ * the platforms, even though they happen to be common.
+ */
+struct i2c_driver_device {
+ char *of_device;
+ char *i2c_type;
+};
+
+static struct i2c_driver_device i2c_devices[] __initdata = {
+ {"dallas,ds1338", "ds1338"},
+};
+
+static int __init find_i2c_driver(struct device_node *node,
+ struct i2c_board_info *info)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(i2c_devices); i++) {
+ if (!of_device_is_compatible(node, i2c_devices[i].of_device))
+ continue;
+ if (strscpy(info->type, i2c_devices[i].i2c_type, I2C_NAME_SIZE) < 0)
+ return -ENOMEM;
+ return 0;
+ }
+ return -ENODEV;
+}
+
+static int __init pasemi_register_i2c_devices(void)
+{
+ struct pci_dev *pdev;
+ struct device_node *adap_node;
+ struct device_node *node;
+
+ pdev = NULL;
+ while ((pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa003, pdev))) {
+ adap_node = pci_device_to_OF_node(pdev);
+
+ if (!adap_node)
+ continue;
+
+ for_each_child_of_node(adap_node, node) {
+ struct i2c_board_info info = {};
+ const u32 *addr;
+ int len;
+
+ addr = of_get_property(node, "reg", &len);
+ if (!addr || len < sizeof(int) ||
+ *addr > (1 << 10) - 1) {
+ pr_warn("pasemi_register_i2c_devices: invalid i2c device entry\n");
+ continue;
+ }
+
+ info.irq = irq_of_parse_and_map(node, 0);
+ if (!info.irq)
+ info.irq = -1;
+
+ if (find_i2c_driver(node, &info) < 0)
+ continue;
+
+ info.addr = *addr;
+
+ i2c_register_board_info(PCI_FUNC(pdev->devfn), &info,
+ 1);
+ }
+ }
+ return 0;
+}
+device_initcall(pasemi_register_i2c_devices);
+#endif
diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c
new file mode 100644
index 000000000..166c97fff
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/msi.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2007, Olof Johansson, PA Semi
+ *
+ * Based on arch/powerpc/sysdev/mpic_u3msi.c:
+ *
+ * Copyright 2006, Segher Boessenkool, IBM Corporation.
+ * Copyright 2006-2007, Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+#include <asm/mpic.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/msi_bitmap.h>
+
+#include <sysdev/mpic.h>
+
+/* Allocate 16 interrupts per device, to give an alignment of 16,
+ * since that's the size of the grouping w.r.t. affinity. If someone
+ * needs more than 32 MSI's down the road we'll have to rethink this,
+ * but it should be OK for now.
+ */
+#define ALLOC_CHUNK 16
+
+#define PASEMI_MSI_ADDR 0xfc080000
+
+/* A bit ugly, can we get this from the pci_dev somehow? */
+static struct mpic *msi_mpic;
+
+
+static void mpic_pasemi_msi_mask_irq(struct irq_data *data)
+{
+ pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq);
+ pci_msi_mask_irq(data);
+ mpic_mask_irq(data);
+}
+
+static void mpic_pasemi_msi_unmask_irq(struct irq_data *data)
+{
+ pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq);
+ mpic_unmask_irq(data);
+ pci_msi_unmask_irq(data);
+}
+
+static struct irq_chip mpic_pasemi_msi_chip = {
+ .irq_shutdown = mpic_pasemi_msi_mask_irq,
+ .irq_mask = mpic_pasemi_msi_mask_irq,
+ .irq_unmask = mpic_pasemi_msi_unmask_irq,
+ .irq_eoi = mpic_end_irq,
+ .irq_set_type = mpic_set_irq_type,
+ .irq_set_affinity = mpic_set_affinity,
+ .name = "PASEMI-MSI",
+};
+
+static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev)
+{
+ struct msi_desc *entry;
+ irq_hw_number_t hwirq;
+
+ pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev);
+
+ msi_for_each_desc(entry, &pdev->dev, MSI_DESC_ASSOCIATED) {
+ hwirq = virq_to_hw(entry->irq);
+ irq_set_msi_desc(entry->irq, NULL);
+ irq_dispose_mapping(entry->irq);
+ entry->irq = 0;
+ msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK);
+ }
+}
+
+static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ unsigned int virq;
+ struct msi_desc *entry;
+ struct msi_msg msg;
+ int hwirq;
+
+ if (type == PCI_CAP_ID_MSIX)
+ pr_debug("pasemi_msi: MSI-X untested, trying anyway\n");
+ pr_debug("pasemi_msi_setup_msi_irqs, pdev %p nvec %d type %d\n",
+ pdev, nvec, type);
+
+ msg.address_hi = 0;
+ msg.address_lo = PASEMI_MSI_ADDR;
+
+ msi_for_each_desc(entry, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
+ /* Allocate 16 interrupts for now, since that's the grouping for
+ * affinity. This can be changed later if it turns out 32 is too
+ * few MSIs for someone, but restrictions will apply to how the
+ * sources can be changed independently.
+ */
+ hwirq = msi_bitmap_alloc_hwirqs(&msi_mpic->msi_bitmap,
+ ALLOC_CHUNK);
+ if (hwirq < 0) {
+ pr_debug("pasemi_msi: failed allocating hwirq\n");
+ return hwirq;
+ }
+
+ virq = irq_create_mapping(msi_mpic->irqhost, hwirq);
+ if (!virq) {
+ pr_debug("pasemi_msi: failed mapping hwirq 0x%x\n",
+ hwirq);
+ msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq,
+ ALLOC_CHUNK);
+ return -ENOSPC;
+ }
+
+ /* Vector on MSI is really an offset, the hardware adds
+ * it to the value written at the magic address. So set
+ * it to 0 to remain sane.
+ */
+ mpic_set_vector(virq, 0);
+
+ irq_set_msi_desc(virq, entry);
+ irq_set_chip(virq, &mpic_pasemi_msi_chip);
+ irq_set_irq_type(virq, IRQ_TYPE_EDGE_RISING);
+
+ pr_debug("pasemi_msi: allocated virq 0x%x (hw 0x%x) " \
+ "addr 0x%x\n", virq, hwirq, msg.address_lo);
+
+ /* Likewise, the device writes [0...511] into the target
+ * register to generate MSI [512...1023]
+ */
+ msg.data = hwirq-0x200;
+ pci_write_msi_msg(virq, &msg);
+ }
+
+ return 0;
+}
+
+int __init mpic_pasemi_msi_init(struct mpic *mpic)
+{
+ int rc;
+ struct pci_controller *phb;
+ struct device_node *of_node;
+
+ of_node = irq_domain_get_of_node(mpic->irqhost);
+ if (!of_node ||
+ !of_device_is_compatible(of_node,
+ "pasemi,pwrficient-openpic"))
+ return -ENODEV;
+
+ rc = mpic_msi_init_allocator(mpic);
+ if (rc) {
+ pr_debug("pasemi_msi: Error allocating bitmap!\n");
+ return rc;
+ }
+
+ pr_debug("pasemi_msi: Registering PA Semi MPIC MSI callbacks\n");
+
+ msi_mpic = mpic;
+ list_for_each_entry(phb, &hose_list, list_node) {
+ WARN_ON(phb->controller_ops.setup_msi_irqs);
+ phb->controller_ops.setup_msi_irqs = pasemi_msi_setup_msi_irqs;
+ phb->controller_ops.teardown_msi_irqs = pasemi_msi_teardown_msi_irqs;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
new file mode 100644
index 000000000..018c30665
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/pasemi.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PASEMI_PASEMI_H
+#define _PASEMI_PASEMI_H
+
+extern time64_t pas_get_boot_time(void);
+extern void pas_pci_init(void);
+struct pci_dev;
+extern void pas_pci_irq_fixup(struct pci_dev *dev);
+extern void pas_pci_dma_dev_setup(struct pci_dev *dev);
+
+void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset);
+
+extern void __init pasemi_map_registers(void);
+
+/* Power savings modes, implemented in asm */
+extern void idle_spin(void);
+extern void idle_doze(void);
+
+/* Restore astate to last set */
+#ifdef CONFIG_PPC_PASEMI_CPUFREQ
+extern int check_astate(void);
+extern void restore_astate(int cpu);
+#else
+static inline int check_astate(void)
+{
+ /* Always return >0 so we never power save */
+ return 1;
+}
+static inline void restore_astate(int cpu)
+{
+}
+#endif
+
+extern struct pci_controller_ops pasemi_pci_controller_ops;
+
+#endif /* _PASEMI_PASEMI_H */
diff --git a/arch/powerpc/platforms/pasemi/pci.c b/arch/powerpc/platforms/pasemi/pci.c
new file mode 100644
index 000000000..f27d31414
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/pci.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Authors: Kip Walker, PA Semi
+ * Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on arch/powerpc/platforms/maple/pci.c
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/pci.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/isa-bridge.h>
+#include <asm/machdep.h>
+
+#include <asm/ppc-pci.h>
+
+#include "pasemi.h"
+
+#define PA_PXP_CFA(bus, devfn, off) (((bus) << 20) | ((devfn) << 12) | (off))
+
+static inline int pa_pxp_offset_valid(u8 bus, u8 devfn, int offset)
+{
+ /* Device 0 Function 0 is special: It's config space spans function 1 as
+ * well, so allow larger offset. It's really a two-function device but the
+ * second function does not probe.
+ */
+ if (bus == 0 && devfn == 0)
+ return offset < 8192;
+ else
+ return offset < 4096;
+}
+
+static void volatile __iomem *pa_pxp_cfg_addr(struct pci_controller *hose,
+ u8 bus, u8 devfn, int offset)
+{
+ return hose->cfg_data + PA_PXP_CFA(bus, devfn, offset);
+}
+
+static inline int is_root_port(int busno, int devfn)
+{
+ return ((busno == 0) && (PCI_FUNC(devfn) < 4) &&
+ ((PCI_SLOT(devfn) == 16) || (PCI_SLOT(devfn) == 17)));
+}
+
+static inline int is_5945_reg(int reg)
+{
+ return (((reg >= 0x18) && (reg < 0x34)) ||
+ ((reg >= 0x158) && (reg < 0x178)));
+}
+
+static int workaround_5945(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 *val)
+{
+ struct pci_controller *hose;
+ void volatile __iomem *addr, *dummy;
+ int byte;
+ u32 tmp;
+
+ if (!is_root_port(bus->number, devfn) || !is_5945_reg(offset))
+ return 0;
+
+ hose = pci_bus_to_host(bus);
+
+ addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset & ~0x3);
+ byte = offset & 0x3;
+
+ /* Workaround bug 5945: write 0 to a dummy register before reading,
+ * and write back what we read. We must read/write the full 32-bit
+ * contents so we need to shift and mask by hand.
+ */
+ dummy = pa_pxp_cfg_addr(hose, bus->number, devfn, 0x10);
+ out_le32(dummy, 0);
+ tmp = in_le32(addr);
+ out_le32(addr, tmp);
+
+ switch (len) {
+ case 1:
+ *val = (tmp >> (8*byte)) & 0xff;
+ break;
+ case 2:
+ if (byte == 0)
+ *val = tmp & 0xffff;
+ else
+ *val = (tmp >> 16) & 0xffff;
+ break;
+ default:
+ *val = tmp;
+ break;
+ }
+
+ return 1;
+}
+
+#ifdef CONFIG_PPC_PASEMI_NEMO
+#define PXP_ERR_CFG_REG 0x4
+#define PXP_IGNORE_PCIE_ERRORS 0x800
+#define SB600_BUS 5
+
+static void sb600_set_flag(int bus)
+{
+ static void __iomem *iob_mapbase = NULL;
+ struct resource res;
+ struct device_node *dn;
+ int err;
+
+ if (iob_mapbase == NULL) {
+ dn = of_find_compatible_node(NULL, "isa", "pasemi,1682m-iob");
+ if (!dn) {
+ pr_crit("NEMO SB600 missing iob node\n");
+ return;
+ }
+
+ err = of_address_to_resource(dn, 0, &res);
+ of_node_put(dn);
+
+ if (err) {
+ pr_crit("NEMO SB600 missing resource\n");
+ return;
+ }
+
+ pr_info("NEMO SB600 IOB base %08llx\n",res.start);
+
+ iob_mapbase = ioremap(res.start + 0x100, 0x94);
+ }
+
+ if (iob_mapbase != NULL) {
+ if (bus == SB600_BUS) {
+ /*
+ * This is the SB600's bus, tell the PCI-e root port
+ * to allow non-zero devices to enumerate.
+ */
+ out_le32(iob_mapbase + PXP_ERR_CFG_REG, in_le32(iob_mapbase + PXP_ERR_CFG_REG) | PXP_IGNORE_PCIE_ERRORS);
+ } else {
+ /*
+ * Only scan device 0 on other busses
+ */
+ out_le32(iob_mapbase + PXP_ERR_CFG_REG, in_le32(iob_mapbase + PXP_ERR_CFG_REG) & ~PXP_IGNORE_PCIE_ERRORS);
+ }
+ }
+}
+
+#else
+
+static void sb600_set_flag(int bus)
+{
+}
+#endif
+
+static int pa_pxp_read_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 *val)
+{
+ struct pci_controller *hose;
+ void volatile __iomem *addr;
+
+ hose = pci_bus_to_host(bus);
+ if (!hose)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (!pa_pxp_offset_valid(bus->number, devfn, offset))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ if (workaround_5945(bus, devfn, offset, len, val))
+ return PCIBIOS_SUCCESSFUL;
+
+ addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset);
+
+ sb600_set_flag(bus->number);
+
+ /*
+ * Note: the caller has already checked that offset is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ switch (len) {
+ case 1:
+ *val = in_8(addr);
+ break;
+ case 2:
+ *val = in_le16(addr);
+ break;
+ default:
+ *val = in_le32(addr);
+ break;
+ }
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int pa_pxp_write_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 val)
+{
+ struct pci_controller *hose;
+ void volatile __iomem *addr;
+
+ hose = pci_bus_to_host(bus);
+ if (!hose)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (!pa_pxp_offset_valid(bus->number, devfn, offset))
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+
+ addr = pa_pxp_cfg_addr(hose, bus->number, devfn, offset);
+
+ sb600_set_flag(bus->number);
+
+ /*
+ * Note: the caller has already checked that offset is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ switch (len) {
+ case 1:
+ out_8(addr, val);
+ break;
+ case 2:
+ out_le16(addr, val);
+ break;
+ default:
+ out_le32(addr, val);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops pa_pxp_ops = {
+ .read = pa_pxp_read_config,
+ .write = pa_pxp_write_config,
+};
+
+static void __init setup_pa_pxp(struct pci_controller *hose)
+{
+ hose->ops = &pa_pxp_ops;
+ hose->cfg_data = ioremap(0xe0000000, 0x10000000);
+}
+
+static int __init pas_add_bridge(struct device_node *dev)
+{
+ struct pci_controller *hose;
+
+ pr_debug("Adding PCI host bridge %pOF\n", dev);
+
+ hose = pcibios_alloc_controller(dev);
+ if (!hose)
+ return -ENOMEM;
+
+ hose->first_busno = 0;
+ hose->last_busno = 0xff;
+ hose->controller_ops = pasemi_pci_controller_ops;
+
+ setup_pa_pxp(hose);
+
+ pr_info("Found PA-PXP PCI host bridge.\n");
+
+ /* Interpret the "ranges" property */
+ pci_process_bridge_OF_ranges(hose, dev, 1);
+
+ /*
+ * Scan for an isa bridge. This is needed to find the SB600 on the nemo
+ * and does nothing on machines without one.
+ */
+ isa_bridge_find_early(hose);
+
+ return 0;
+}
+
+void __init pas_pci_init(void)
+{
+ struct device_node *np;
+ int res;
+
+ pci_set_flags(PCI_SCAN_ALL_PCIE_DEVS);
+
+ np = of_find_compatible_node(of_root, NULL, "pasemi,rootbus");
+ if (np) {
+ res = pas_add_bridge(np);
+ of_node_put(np);
+ }
+}
+
+void __iomem *__init pasemi_pci_getcfgaddr(struct pci_dev *dev, int offset)
+{
+ struct pci_controller *hose;
+
+ hose = pci_bus_to_host(dev->bus);
+
+ return (void __iomem *)pa_pxp_cfg_addr(hose, dev->bus->number, dev->devfn, offset);
+}
+
+struct pci_controller_ops pasemi_pci_controller_ops;
diff --git a/arch/powerpc/platforms/pasemi/powersave.S b/arch/powerpc/platforms/pasemi/powersave.S
new file mode 100644
index 000000000..d0215d532
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/powersave.S
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+
+/* Power savings opcodes since not all binutils have them at this time */
+#define DOZE .long 0x4c000324
+#define NAP .long 0x4c000364
+#define SLEEP .long 0x4c0003a4
+#define RVW .long 0x4c0003e4
+
+/* Common sequence to do before going to any of the
+ * powersavings modes.
+ */
+
+#define PRE_SLEEP_SEQUENCE \
+ std r3,8(r1); \
+ ptesync ; \
+ ld r3,8(r1); \
+1: cmpd r3,r3; \
+ bne 1b
+
+_doze:
+ PRE_SLEEP_SEQUENCE
+ DOZE
+ b .
+
+
+_GLOBAL(idle_spin)
+ blr
+
+_GLOBAL(idle_doze)
+ LOAD_REG_ADDR(r3, _doze)
+ b sleep_common
+
+/* Add more modes here later */
+
+sleep_common:
+ mflr r0
+ std r0, 16(r1)
+ stdu r1,-64(r1)
+#ifdef CONFIG_PPC_PASEMI_CPUFREQ
+ std r3, 48(r1)
+
+ /* Only do power savings when in astate 0 */
+ bl check_astate
+ cmpwi r3,0
+ bne 1f
+
+ ld r3, 48(r1)
+#endif
+ LOAD_REG_IMMEDIATE(r6,MSR_DR|MSR_IR|MSR_ME|MSR_EE)
+ mfmsr r4
+ andc r5,r4,r6
+ mtmsrd r5,0
+
+ mtctr r3
+ bctrl
+
+ mtmsrd r4,0
+
+1: addi r1,r1,64
+ ld r0,16(r1)
+ mtlr r0
+ blr
+
diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c
new file mode 100644
index 000000000..ef985ba2b
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/setup.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Authors: Kip Walker, PA Semi
+ * Olof Johansson, PA Semi
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Based on arch/powerpc/platforms/maple/setup.c
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/gfp.h>
+#include <linux/irqdomain.h>
+
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/i8259.h>
+#include <asm/mpic.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+#include <asm/mmu.h>
+#include <asm/debug.h>
+
+#include <pcmcia/ss.h>
+#include <pcmcia/cistpl.h>
+#include <pcmcia/ds.h>
+
+#include "pasemi.h"
+
+/* SDC reset register, must be pre-mapped at reset time */
+static void __iomem *reset_reg;
+
+/* Various error status registers, must be pre-mapped at MCE time */
+
+#define MAX_MCE_REGS 32
+struct mce_regs {
+ char *name;
+ void __iomem *addr;
+};
+
+static struct mce_regs mce_regs[MAX_MCE_REGS];
+static int num_mce_regs;
+static int nmi_virq = 0;
+
+
+static void __noreturn pas_restart(char *cmd)
+{
+ /* Need to put others cpu in hold loop so they're not sleeping */
+ smp_send_stop();
+ udelay(10000);
+ printk("Restarting...\n");
+ while (1)
+ out_le32(reset_reg, 0x6000000);
+}
+
+#ifdef CONFIG_PPC_PASEMI_NEMO
+void pas_shutdown(void)
+{
+ /* Set the PLD bit that makes the SB600 think the power button is being pressed */
+ void __iomem *pld_map = ioremap(0xf5000000,4096);
+ while (1)
+ out_8(pld_map+7,0x01);
+}
+
+/* RTC platform device structure as is not in device tree */
+static struct resource rtc_resource[] = {{
+ .name = "rtc",
+ .start = 0x70,
+ .end = 0x71,
+ .flags = IORESOURCE_IO,
+}, {
+ .name = "rtc",
+ .start = 8,
+ .end = 8,
+ .flags = IORESOURCE_IRQ,
+}};
+
+static inline void nemo_init_rtc(void)
+{
+ platform_device_register_simple("rtc_cmos", -1, rtc_resource, 2);
+}
+
+#else
+
+static inline void nemo_init_rtc(void)
+{
+}
+#endif
+
+#ifdef CONFIG_SMP
+static arch_spinlock_t timebase_lock;
+static unsigned long timebase;
+
+static void pas_give_timebase(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ hard_irq_disable();
+ arch_spin_lock(&timebase_lock);
+ mtspr(SPRN_TBCTL, TBCTL_FREEZE);
+ isync();
+ timebase = get_tb();
+ arch_spin_unlock(&timebase_lock);
+
+ while (timebase)
+ barrier();
+ mtspr(SPRN_TBCTL, TBCTL_RESTART);
+ local_irq_restore(flags);
+}
+
+static void pas_take_timebase(void)
+{
+ while (!timebase)
+ smp_rmb();
+
+ arch_spin_lock(&timebase_lock);
+ set_tb(timebase >> 32, timebase & 0xffffffff);
+ timebase = 0;
+ arch_spin_unlock(&timebase_lock);
+}
+
+static struct smp_ops_t pas_smp_ops = {
+ .probe = smp_mpic_probe,
+ .message_pass = smp_mpic_message_pass,
+ .kick_cpu = smp_generic_kick_cpu,
+ .setup_cpu = smp_mpic_setup_cpu,
+ .give_timebase = pas_give_timebase,
+ .take_timebase = pas_take_timebase,
+};
+#endif /* CONFIG_SMP */
+
+static void __init pas_setup_arch(void)
+{
+#ifdef CONFIG_SMP
+ /* Setup SMP callback */
+ smp_ops = &pas_smp_ops;
+#endif
+
+ /* Remap SDC register for doing reset */
+ /* XXXOJN This should maybe come out of the device tree */
+ reset_reg = ioremap(0xfc101100, 4);
+}
+
+static int __init pas_setup_mce_regs(void)
+{
+ struct pci_dev *dev;
+ int reg;
+
+ /* Remap various SoC status registers for use by the MCE handler */
+
+ reg = 0;
+
+ dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa00a, NULL);
+ while (dev && reg < MAX_MCE_REGS) {
+ mce_regs[reg].name = kasprintf(GFP_KERNEL,
+ "mc%d_mcdebug_errsta", reg);
+ mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x730);
+ dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa00a, dev);
+ reg++;
+ }
+
+ dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa001, NULL);
+ if (dev && reg+4 < MAX_MCE_REGS) {
+ mce_regs[reg].name = "iobdbg_IntStatus1";
+ mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x438);
+ reg++;
+ mce_regs[reg].name = "iobdbg_IOCTbusIntDbgReg";
+ mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x454);
+ reg++;
+ mce_regs[reg].name = "iobiom_IntStatus";
+ mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0xc10);
+ reg++;
+ mce_regs[reg].name = "iobiom_IntDbgReg";
+ mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0xc1c);
+ reg++;
+ }
+
+ dev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa009, NULL);
+ if (dev && reg+2 < MAX_MCE_REGS) {
+ mce_regs[reg].name = "l2csts_IntStatus";
+ mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x200);
+ reg++;
+ mce_regs[reg].name = "l2csts_Cnt";
+ mce_regs[reg].addr = pasemi_pci_getcfgaddr(dev, 0x214);
+ reg++;
+ }
+
+ num_mce_regs = reg;
+
+ return 0;
+}
+machine_device_initcall(pasemi, pas_setup_mce_regs);
+
+#ifdef CONFIG_PPC_PASEMI_NEMO
+static void sb600_8259_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int cascade_irq = i8259_irq();
+
+ if (cascade_irq)
+ generic_handle_irq(cascade_irq);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+static void __init nemo_init_IRQ(struct mpic *mpic)
+{
+ struct device_node *np;
+ int gpio_virq;
+ /* Connect the SB600's legacy i8259 controller */
+ np = of_find_node_by_path("/pxp@0,e0000000");
+ i8259_init(np, 0);
+ of_node_put(np);
+
+ gpio_virq = irq_create_mapping(NULL, 3);
+ irq_set_irq_type(gpio_virq, IRQ_TYPE_LEVEL_HIGH);
+ irq_set_chained_handler(gpio_virq, sb600_8259_cascade);
+ mpic_unmask_irq(irq_get_irq_data(gpio_virq));
+
+ irq_set_default_host(mpic->irqhost);
+}
+
+#else
+
+static inline void nemo_init_IRQ(struct mpic *mpic)
+{
+}
+#endif
+
+static __init void pas_init_IRQ(void)
+{
+ struct device_node *np;
+ struct device_node *root, *mpic_node;
+ unsigned long openpic_addr;
+ const unsigned int *opprop;
+ int naddr, opplen;
+ int mpic_flags;
+ const unsigned int *nmiprop;
+ struct mpic *mpic;
+
+ mpic_node = NULL;
+
+ for_each_node_by_type(np, "interrupt-controller")
+ if (of_device_is_compatible(np, "open-pic")) {
+ mpic_node = np;
+ break;
+ }
+ if (!mpic_node)
+ for_each_node_by_type(np, "open-pic") {
+ mpic_node = np;
+ break;
+ }
+ if (!mpic_node) {
+ pr_err("Failed to locate the MPIC interrupt controller\n");
+ return;
+ }
+
+ /* Find address list in /platform-open-pic */
+ root = of_find_node_by_path("/");
+ naddr = of_n_addr_cells(root);
+ opprop = of_get_property(root, "platform-open-pic", &opplen);
+ if (!opprop) {
+ pr_err("No platform-open-pic property.\n");
+ of_node_put(root);
+ return;
+ }
+ openpic_addr = of_read_number(opprop, naddr);
+ pr_debug("OpenPIC addr: %lx\n", openpic_addr);
+
+ mpic_flags = MPIC_LARGE_VECTORS | MPIC_NO_BIAS | MPIC_NO_RESET;
+
+ nmiprop = of_get_property(mpic_node, "nmi-source", NULL);
+ if (nmiprop)
+ mpic_flags |= MPIC_ENABLE_MCK;
+
+ mpic = mpic_alloc(mpic_node, openpic_addr,
+ mpic_flags, 0, 0, "PASEMI-OPIC");
+ BUG_ON(!mpic);
+
+ mpic_assign_isu(mpic, 0, mpic->paddr + 0x10000);
+ mpic_init(mpic);
+ /* The NMI/MCK source needs to be prio 15 */
+ if (nmiprop) {
+ nmi_virq = irq_create_mapping(NULL, *nmiprop);
+ mpic_irq_set_priority(nmi_virq, 15);
+ irq_set_irq_type(nmi_virq, IRQ_TYPE_EDGE_RISING);
+ mpic_unmask_irq(irq_get_irq_data(nmi_virq));
+ }
+
+ nemo_init_IRQ(mpic);
+
+ of_node_put(mpic_node);
+ of_node_put(root);
+}
+
+static void __init pas_progress(char *s, unsigned short hex)
+{
+ printk("[%04x] : %s\n", hex, s ? s : "");
+}
+
+
+static int pas_machine_check_handler(struct pt_regs *regs)
+{
+ int cpu = smp_processor_id();
+ unsigned long srr0, srr1, dsisr;
+ int dump_slb = 0;
+ int i;
+
+ srr0 = regs->nip;
+ srr1 = regs->msr;
+
+ if (nmi_virq && mpic_get_mcirq() == nmi_virq) {
+ pr_err("NMI delivered\n");
+ debugger(regs);
+ mpic_end_irq(irq_get_irq_data(nmi_virq));
+ goto out;
+ }
+
+ dsisr = mfspr(SPRN_DSISR);
+ pr_err("Machine Check on CPU %d\n", cpu);
+ pr_err("SRR0 0x%016lx SRR1 0x%016lx\n", srr0, srr1);
+ pr_err("DSISR 0x%016lx DAR 0x%016lx\n", dsisr, regs->dar);
+ pr_err("BER 0x%016lx MER 0x%016lx\n", mfspr(SPRN_PA6T_BER),
+ mfspr(SPRN_PA6T_MER));
+ pr_err("IER 0x%016lx DER 0x%016lx\n", mfspr(SPRN_PA6T_IER),
+ mfspr(SPRN_PA6T_DER));
+ pr_err("Cause:\n");
+
+ if (srr1 & 0x200000)
+ pr_err("Signalled by SDC\n");
+
+ if (srr1 & 0x100000) {
+ pr_err("Load/Store detected error:\n");
+ if (dsisr & 0x8000)
+ pr_err("D-cache ECC double-bit error or bus error\n");
+ if (dsisr & 0x4000)
+ pr_err("LSU snoop response error\n");
+ if (dsisr & 0x2000) {
+ pr_err("MMU SLB multi-hit or invalid B field\n");
+ dump_slb = 1;
+ }
+ if (dsisr & 0x1000)
+ pr_err("Recoverable Duptags\n");
+ if (dsisr & 0x800)
+ pr_err("Recoverable D-cache parity error count overflow\n");
+ if (dsisr & 0x400)
+ pr_err("TLB parity error count overflow\n");
+ }
+
+ if (srr1 & 0x80000)
+ pr_err("Bus Error\n");
+
+ if (srr1 & 0x40000) {
+ pr_err("I-side SLB multiple hit\n");
+ dump_slb = 1;
+ }
+
+ if (srr1 & 0x20000)
+ pr_err("I-cache parity error hit\n");
+
+ if (num_mce_regs == 0)
+ pr_err("No MCE registers mapped yet, can't dump\n");
+ else
+ pr_err("SoC debug registers:\n");
+
+ for (i = 0; i < num_mce_regs; i++)
+ pr_err("%s: 0x%08x\n", mce_regs[i].name,
+ in_le32(mce_regs[i].addr));
+
+ if (dump_slb) {
+ unsigned long e, v;
+ int i;
+
+ pr_err("slb contents:\n");
+ for (i = 0; i < mmu_slb_size; i++) {
+ asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i));
+ asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i));
+ pr_err("%02d %016lx %016lx\n", i, e, v);
+ }
+ }
+
+out:
+ /* SRR1[62] is from MSR[62] if recoverable, so pass that back */
+ return !!(srr1 & 0x2);
+}
+
+static const struct of_device_id pasemi_bus_ids[] = {
+ /* Unfortunately needed for legacy firmwares */
+ { .type = "localbus", },
+ { .type = "sdc", },
+ /* These are the proper entries, which newer firmware uses */
+ { .compatible = "pasemi,localbus", },
+ { .compatible = "pasemi,sdc", },
+ {},
+};
+
+static int __init pasemi_publish_devices(void)
+{
+ /* Publish OF platform devices for SDC and other non-PCI devices */
+ of_platform_bus_probe(NULL, pasemi_bus_ids, NULL);
+
+ nemo_init_rtc();
+
+ return 0;
+}
+machine_device_initcall(pasemi, pasemi_publish_devices);
+
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init pas_probe(void)
+{
+ if (!of_machine_is_compatible("PA6T-1682M") &&
+ !of_machine_is_compatible("pasemi,pwrficient"))
+ return 0;
+
+#ifdef CONFIG_PPC_PASEMI_NEMO
+ /*
+ * Check for the Nemo motherboard here, if we are running on one
+ * change the machine definition to fit
+ */
+ if (of_machine_is_compatible("pasemi,nemo")) {
+ pm_power_off = pas_shutdown;
+ ppc_md.name = "A-EON Amigaone X1000";
+ }
+#endif
+
+ iommu_init_early_pasemi();
+
+ return 1;
+}
+
+define_machine(pasemi) {
+ .name = "PA Semi PWRficient",
+ .probe = pas_probe,
+ .setup_arch = pas_setup_arch,
+ .discover_phbs = pas_pci_init,
+ .init_IRQ = pas_init_IRQ,
+ .get_irq = mpic_get_irq,
+ .restart = pas_restart,
+ .get_boot_time = pas_get_boot_time,
+ .progress = pas_progress,
+ .machine_check_exception = pas_machine_check_handler,
+};
diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c
new file mode 100644
index 000000000..70ac6db02
--- /dev/null
+++ b/arch/powerpc/platforms/pasemi/time.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2006 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ */
+
+#include <linux/time.h>
+
+#include <asm/time.h>
+
+#include "pasemi.h"
+
+time64_t __init pas_get_boot_time(void)
+{
+ /* Let's just return a fake date right now */
+ return mktime64(2006, 1, 1, 12, 0, 0);
+}
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
new file mode 100644
index 000000000..130707ec9
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_PMAC
+ bool "Apple PowerMac based machines"
+ depends on PPC_BOOK3S && CPU_BIG_ENDIAN
+ select MPIC
+ select FORCE_PCI
+ select PPC_INDIRECT_PCI if PPC32
+ select PPC_MPC106 if PPC32
+ select PPC_64S_HASH_MMU if PPC64
+ select PPC_HASH_MMU_NATIVE
+ select ZONE_DMA if PPC32
+ default y
+
+config PPC_PMAC64
+ bool
+ depends on PPC_PMAC && PPC64
+ select MPIC
+ select U3_DART
+ select MPIC_U3_HT_IRQS
+ select GENERIC_TBSYNC
+ select PPC_970_NAP
+ default y
+
+config PPC_PMAC32_PSURGE
+ bool "Support for powersurge upgrade cards" if EXPERT
+ depends on SMP && PPC32 && PPC_PMAC
+ select PPC_SMP_MUXED_IPI
+ select IRQ_DOMAIN_NOMAP
+ default y
+ help
+ The powersurge cpu boards can be used in the generation
+ of powermacs that have a socket for an upgradeable cpu card,
+ including the 7500, 8500, 9500, 9600. Support exists for
+ both dual and quad socket upgrade cards.
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
new file mode 100644
index 000000000..cf85f0662
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS_bootx_init.o += -fPIC
+CFLAGS_bootx_init.o += -fno-stack-protector
+
+KASAN_SANITIZE_bootx_init.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_bootx_init.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+ifdef CONFIG_FUNCTION_TRACER
+# Do not trace early boot code
+CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE)
+endif
+
+obj-y += pic.o setup.o time.o feature.o pci.o \
+ sleep.o low_i2c.o cache.o pfunc_core.o \
+ pfunc_base.o udbg_scc.o udbg_adb.o
+obj-$(CONFIG_PMAC_BACKLIGHT) += backlight.o
+# CONFIG_NVRAM is an arch. independent tristate symbol, for pmac32 we really
+# need this to be a bool. Cheat here and pretend CONFIG_NVRAM=m is really
+# CONFIG_NVRAM=y
+obj-$(CONFIG_NVRAM:m=y) += nvram.o
+obj-$(CONFIG_PPC32) += bootx_init.o
+obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c
new file mode 100644
index 000000000..aeb79a8b3
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/backlight.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Miscellaneous procedures for dealing with the PowerMac hardware.
+ * Contains support for the backlight.
+ *
+ * Copyright (C) 2000 Benjamin Herrenschmidt
+ * Copyright (C) 2006 Michael Hanselmann <linux-kernel@hansmi.ch>
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/fb.h>
+#include <linux/backlight.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <asm/backlight.h>
+
+#define OLD_BACKLIGHT_MAX 15
+
+static void pmac_backlight_key_worker(struct work_struct *work);
+static void pmac_backlight_set_legacy_worker(struct work_struct *work);
+
+static DECLARE_WORK(pmac_backlight_key_work, pmac_backlight_key_worker);
+static DECLARE_WORK(pmac_backlight_set_legacy_work, pmac_backlight_set_legacy_worker);
+
+/* Although these variables are used in interrupt context, it makes no sense to
+ * protect them. No user is able to produce enough key events per second and
+ * notice the errors that might happen.
+ */
+static int pmac_backlight_key_queued;
+static int pmac_backlight_set_legacy_queued;
+
+/* The via-pmu code allows the backlight to be grabbed, in which case the
+ * in-kernel control of the brightness needs to be disabled. This should
+ * only be used by really old PowerBooks.
+ */
+static atomic_t kernel_backlight_disabled = ATOMIC_INIT(0);
+
+/* Protect the pmac_backlight variable below.
+ You should hold this lock when using the pmac_backlight pointer to
+ prevent its potential removal. */
+DEFINE_MUTEX(pmac_backlight_mutex);
+
+/* Main backlight storage
+ *
+ * Backlight drivers in this variable are required to have the "ops"
+ * attribute set and to have an update_status function.
+ *
+ * We can only store one backlight here, but since Apple laptops have only one
+ * internal display, it doesn't matter. Other backlight drivers can be used
+ * independently.
+ *
+ */
+struct backlight_device *pmac_backlight;
+
+int pmac_has_backlight_type(const char *type)
+{
+ struct device_node* bk_node = of_find_node_by_name(NULL, "backlight");
+
+ if (bk_node) {
+ const char *prop = of_get_property(bk_node,
+ "backlight-control", NULL);
+ if (prop && strncmp(prop, type, strlen(type)) == 0) {
+ of_node_put(bk_node);
+ return 1;
+ }
+ of_node_put(bk_node);
+ }
+
+ return 0;
+}
+
+int pmac_backlight_curve_lookup(struct fb_info *info, int value)
+{
+ int level = (FB_BACKLIGHT_LEVELS - 1);
+
+ if (info && info->bl_dev) {
+ int i, max = 0;
+
+ /* Look for biggest value */
+ for (i = 0; i < FB_BACKLIGHT_LEVELS; i++)
+ max = max((int)info->bl_curve[i], max);
+
+ /* Look for nearest value */
+ for (i = 0; i < FB_BACKLIGHT_LEVELS; i++) {
+ int diff = abs(info->bl_curve[i] - value);
+ if (diff < max) {
+ max = diff;
+ level = i;
+ }
+ }
+
+ }
+
+ return level;
+}
+
+static void pmac_backlight_key_worker(struct work_struct *work)
+{
+ if (atomic_read(&kernel_backlight_disabled))
+ return;
+
+ mutex_lock(&pmac_backlight_mutex);
+ if (pmac_backlight) {
+ struct backlight_properties *props;
+ int brightness;
+
+ props = &pmac_backlight->props;
+
+ brightness = props->brightness +
+ ((pmac_backlight_key_queued?-1:1) *
+ (props->max_brightness / 15));
+
+ if (brightness < 0)
+ brightness = 0;
+ else if (brightness > props->max_brightness)
+ brightness = props->max_brightness;
+
+ props->brightness = brightness;
+ backlight_update_status(pmac_backlight);
+ }
+ mutex_unlock(&pmac_backlight_mutex);
+}
+
+/* This function is called in interrupt context */
+void pmac_backlight_key(int direction)
+{
+ if (atomic_read(&kernel_backlight_disabled))
+ return;
+
+ /* we can receive multiple interrupts here, but the scheduled work
+ * will run only once, with the last value
+ */
+ pmac_backlight_key_queued = direction;
+ schedule_work(&pmac_backlight_key_work);
+}
+
+static int __pmac_backlight_set_legacy_brightness(int brightness)
+{
+ int error = -ENXIO;
+
+ mutex_lock(&pmac_backlight_mutex);
+ if (pmac_backlight) {
+ struct backlight_properties *props;
+
+ props = &pmac_backlight->props;
+ props->brightness = brightness *
+ (props->max_brightness + 1) /
+ (OLD_BACKLIGHT_MAX + 1);
+
+ if (props->brightness > props->max_brightness)
+ props->brightness = props->max_brightness;
+ else if (props->brightness < 0)
+ props->brightness = 0;
+
+ backlight_update_status(pmac_backlight);
+
+ error = 0;
+ }
+ mutex_unlock(&pmac_backlight_mutex);
+
+ return error;
+}
+
+static void pmac_backlight_set_legacy_worker(struct work_struct *work)
+{
+ if (atomic_read(&kernel_backlight_disabled))
+ return;
+
+ __pmac_backlight_set_legacy_brightness(pmac_backlight_set_legacy_queued);
+}
+
+/* This function is called in interrupt context */
+void pmac_backlight_set_legacy_brightness_pmu(int brightness) {
+ if (atomic_read(&kernel_backlight_disabled))
+ return;
+
+ pmac_backlight_set_legacy_queued = brightness;
+ schedule_work(&pmac_backlight_set_legacy_work);
+}
+
+int pmac_backlight_set_legacy_brightness(int brightness)
+{
+ return __pmac_backlight_set_legacy_brightness(brightness);
+}
+
+int pmac_backlight_get_legacy_brightness(void)
+{
+ int result = -ENXIO;
+
+ mutex_lock(&pmac_backlight_mutex);
+ if (pmac_backlight) {
+ struct backlight_properties *props;
+
+ props = &pmac_backlight->props;
+
+ result = props->brightness *
+ (OLD_BACKLIGHT_MAX + 1) /
+ (props->max_brightness + 1);
+ }
+ mutex_unlock(&pmac_backlight_mutex);
+
+ return result;
+}
+
+void pmac_backlight_disable(void)
+{
+ atomic_inc(&kernel_backlight_disabled);
+}
+
+void pmac_backlight_enable(void)
+{
+ atomic_dec(&kernel_backlight_disabled);
+}
+
+EXPORT_SYMBOL_GPL(pmac_backlight);
+EXPORT_SYMBOL_GPL(pmac_backlight_mutex);
+EXPORT_SYMBOL_GPL(pmac_has_backlight_type);
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
new file mode 100644
index 000000000..72eb99aba
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Early boot support code for BootX bootloader
+ *
+ * Copyright (C) 2005 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/of_fdt.h>
+#include <generated/utsrelease.h>
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/page.h>
+#include <asm/bootx.h>
+#include <asm/btext.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+
+#undef DEBUG
+#define SET_BOOT_BAT
+
+#ifdef DEBUG
+#define DBG(fmt...) do { bootx_printf(fmt); } while(0)
+#else
+#define DBG(fmt...) do { } while(0)
+#endif
+
+extern void __start(unsigned long r3, unsigned long r4, unsigned long r5);
+
+static unsigned long __initdata bootx_dt_strbase;
+static unsigned long __initdata bootx_dt_strend;
+static unsigned long __initdata bootx_node_chosen;
+static boot_infos_t * __initdata bootx_info;
+static char __initdata bootx_disp_path[256];
+
+/* Is boot-info compatible ? */
+#define BOOT_INFO_IS_COMPATIBLE(bi) \
+ ((bi)->compatible_version <= BOOT_INFO_VERSION)
+#define BOOT_INFO_IS_V2_COMPATIBLE(bi) ((bi)->version >= 2)
+#define BOOT_INFO_IS_V4_COMPATIBLE(bi) ((bi)->version >= 4)
+
+#ifdef CONFIG_BOOTX_TEXT
+static void __init bootx_printf(const char *format, ...)
+{
+ const char *p, *q, *s;
+ va_list args;
+ unsigned long v;
+
+ va_start(args, format);
+ for (p = format; *p != 0; p = q) {
+ for (q = p; *q != 0 && *q != '\n' && *q != '%'; ++q)
+ ;
+ if (q > p)
+ btext_drawtext(p, q - p);
+ if (*q == 0)
+ break;
+ if (*q == '\n') {
+ ++q;
+ btext_flushline();
+ btext_drawstring("\r\n");
+ btext_flushline();
+ continue;
+ }
+ ++q;
+ if (*q == 0)
+ break;
+ switch (*q) {
+ case 's':
+ ++q;
+ s = va_arg(args, const char *);
+ if (s == NULL)
+ s = "<NULL>";
+ btext_drawstring(s);
+ break;
+ case 'x':
+ ++q;
+ v = va_arg(args, unsigned long);
+ btext_drawhex(v);
+ break;
+ }
+ }
+ va_end(args);
+}
+#else /* CONFIG_BOOTX_TEXT */
+static void __init bootx_printf(const char *format, ...) {}
+#endif /* CONFIG_BOOTX_TEXT */
+
+static void * __init bootx_early_getprop(unsigned long base,
+ unsigned long node,
+ char *prop)
+{
+ struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node);
+ u32 *ppp = &np->properties;
+
+ while(*ppp) {
+ struct bootx_dt_prop *pp =
+ (struct bootx_dt_prop *)(base + *ppp);
+
+ if (strcmp((char *)((unsigned long)pp->name + base),
+ prop) == 0) {
+ return (void *)((unsigned long)pp->value + base);
+ }
+ ppp = &pp->next;
+ }
+ return NULL;
+}
+
+#define dt_push_token(token, mem) \
+ do { \
+ *(mem) = ALIGN(*(mem),4); \
+ *((u32 *)*(mem)) = token; \
+ *(mem) += 4; \
+ } while(0)
+
+static unsigned long __init bootx_dt_find_string(char *str)
+{
+ char *s, *os;
+
+ s = os = (char *)bootx_dt_strbase;
+ s += 4;
+ while (s < (char *)bootx_dt_strend) {
+ if (strcmp(s, str) == 0)
+ return s - os;
+ s += strlen(s) + 1;
+ }
+ return 0;
+}
+
+static void __init bootx_dt_add_prop(char *name, void *data, int size,
+ unsigned long *mem_end)
+{
+ unsigned long soff = bootx_dt_find_string(name);
+ if (data == NULL)
+ size = 0;
+ if (soff == 0) {
+ bootx_printf("WARNING: Can't find string index for <%s>\n",
+ name);
+ return;
+ }
+ if (size > 0x20000) {
+ bootx_printf("WARNING: ignoring large property ");
+ bootx_printf("%s length 0x%x\n", name, size);
+ return;
+ }
+ dt_push_token(OF_DT_PROP, mem_end);
+ dt_push_token(size, mem_end);
+ dt_push_token(soff, mem_end);
+
+ /* push property content */
+ if (size && data) {
+ memcpy((void *)*mem_end, data, size);
+ *mem_end = ALIGN(*mem_end + size, 4);
+ }
+}
+
+static void __init bootx_add_chosen_props(unsigned long base,
+ unsigned long *mem_end)
+{
+ u32 val;
+
+ bootx_dt_add_prop("linux,bootx", NULL, 0, mem_end);
+
+ if (bootx_info->kernelParamsOffset) {
+ char *args = (char *)((unsigned long)bootx_info) +
+ bootx_info->kernelParamsOffset;
+ bootx_dt_add_prop("bootargs", args, strlen(args) + 1, mem_end);
+ }
+ if (bootx_info->ramDisk) {
+ val = ((unsigned long)bootx_info) + bootx_info->ramDisk;
+ bootx_dt_add_prop("linux,initrd-start", &val, 4, mem_end);
+ val += bootx_info->ramDiskSize;
+ bootx_dt_add_prop("linux,initrd-end", &val, 4, mem_end);
+ }
+ if (strlen(bootx_disp_path))
+ bootx_dt_add_prop("linux,stdout-path", bootx_disp_path,
+ strlen(bootx_disp_path) + 1, mem_end);
+}
+
+static void __init bootx_add_display_props(unsigned long base,
+ unsigned long *mem_end,
+ int has_real_node)
+{
+ boot_infos_t *bi = bootx_info;
+ u32 tmp;
+
+ if (has_real_node) {
+ bootx_dt_add_prop("linux,boot-display", NULL, 0, mem_end);
+ bootx_dt_add_prop("linux,opened", NULL, 0, mem_end);
+ } else
+ bootx_dt_add_prop("linux,bootx-noscreen", NULL, 0, mem_end);
+
+ tmp = bi->dispDeviceDepth;
+ bootx_dt_add_prop("linux,bootx-depth", &tmp, 4, mem_end);
+ tmp = bi->dispDeviceRect[2] - bi->dispDeviceRect[0];
+ bootx_dt_add_prop("linux,bootx-width", &tmp, 4, mem_end);
+ tmp = bi->dispDeviceRect[3] - bi->dispDeviceRect[1];
+ bootx_dt_add_prop("linux,bootx-height", &tmp, 4, mem_end);
+ tmp = bi->dispDeviceRowBytes;
+ bootx_dt_add_prop("linux,bootx-linebytes", &tmp, 4, mem_end);
+ tmp = (u32)bi->dispDeviceBase;
+ if (tmp == 0)
+ tmp = (u32)bi->logicalDisplayBase;
+ tmp += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
+ tmp += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8);
+ bootx_dt_add_prop("linux,bootx-addr", &tmp, 4, mem_end);
+}
+
+static void __init bootx_dt_add_string(char *s, unsigned long *mem_end)
+{
+ unsigned int l = strlen(s) + 1;
+ memcpy((void *)*mem_end, s, l);
+ bootx_dt_strend = *mem_end = *mem_end + l;
+}
+
+static void __init bootx_scan_dt_build_strings(unsigned long base,
+ unsigned long node,
+ unsigned long *mem_end)
+{
+ struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node);
+ u32 *cpp, *ppp = &np->properties;
+ unsigned long soff;
+ char *namep;
+
+ /* Keep refs to known nodes */
+ namep = np->full_name ? (char *)(base + np->full_name) : NULL;
+ if (namep == NULL) {
+ bootx_printf("Node without a full name !\n");
+ namep = "";
+ }
+ DBG("* strings: %s\n", namep);
+
+ if (!strcmp(namep, "/chosen")) {
+ DBG(" detected /chosen ! adding properties names !\n");
+ bootx_dt_add_string("linux,bootx", mem_end);
+ bootx_dt_add_string("linux,stdout-path", mem_end);
+ bootx_dt_add_string("linux,initrd-start", mem_end);
+ bootx_dt_add_string("linux,initrd-end", mem_end);
+ bootx_dt_add_string("bootargs", mem_end);
+ bootx_node_chosen = node;
+ }
+ if (node == bootx_info->dispDeviceRegEntryOffset) {
+ DBG(" detected display ! adding properties names !\n");
+ bootx_dt_add_string("linux,boot-display", mem_end);
+ bootx_dt_add_string("linux,opened", mem_end);
+ strscpy(bootx_disp_path, namep, sizeof(bootx_disp_path));
+ }
+
+ /* get and store all property names */
+ while (*ppp) {
+ struct bootx_dt_prop *pp =
+ (struct bootx_dt_prop *)(base + *ppp);
+
+ namep = pp->name ? (char *)(base + pp->name) : NULL;
+ if (namep == NULL || strcmp(namep, "name") == 0)
+ goto next;
+ /* get/create string entry */
+ soff = bootx_dt_find_string(namep);
+ if (soff == 0)
+ bootx_dt_add_string(namep, mem_end);
+ next:
+ ppp = &pp->next;
+ }
+
+ /* do all our children */
+ cpp = &np->child;
+ while(*cpp) {
+ np = (struct bootx_dt_node *)(base + *cpp);
+ bootx_scan_dt_build_strings(base, *cpp, mem_end);
+ cpp = &np->sibling;
+ }
+}
+
+static void __init bootx_scan_dt_build_struct(unsigned long base,
+ unsigned long node,
+ unsigned long *mem_end)
+{
+ struct bootx_dt_node *np = (struct bootx_dt_node *)(base + node);
+ u32 *cpp, *ppp = &np->properties;
+ char *namep, *p, *ep, *lp;
+ int l;
+
+ dt_push_token(OF_DT_BEGIN_NODE, mem_end);
+
+ /* get the node's full name */
+ namep = np->full_name ? (char *)(base + np->full_name) : NULL;
+ if (namep == NULL)
+ namep = "";
+ l = strlen(namep);
+
+ DBG("* struct: %s\n", namep);
+
+ /* Fixup an Apple bug where they have bogus \0 chars in the
+ * middle of the path in some properties, and extract
+ * the unit name (everything after the last '/').
+ */
+ memcpy((void *)*mem_end, namep, l + 1);
+ namep = (char *)*mem_end;
+ for (lp = p = namep, ep = namep + l; p < ep; p++) {
+ if (*p == '/')
+ lp = namep;
+ else if (*p != 0)
+ *lp++ = *p;
+ }
+ *lp = 0;
+ *mem_end = ALIGN((unsigned long)lp + 1, 4);
+
+ /* get and store all properties */
+ while (*ppp) {
+ struct bootx_dt_prop *pp =
+ (struct bootx_dt_prop *)(base + *ppp);
+
+ namep = pp->name ? (char *)(base + pp->name) : NULL;
+ /* Skip "name" */
+ if (namep == NULL || !strcmp(namep, "name"))
+ goto next;
+ /* Skip "bootargs" in /chosen too as we replace it */
+ if (node == bootx_node_chosen && !strcmp(namep, "bootargs"))
+ goto next;
+
+ /* push property head */
+ bootx_dt_add_prop(namep,
+ pp->value ? (void *)(base + pp->value): NULL,
+ pp->length, mem_end);
+ next:
+ ppp = &pp->next;
+ }
+
+ if (node == bootx_node_chosen) {
+ bootx_add_chosen_props(base, mem_end);
+ if (bootx_info->dispDeviceRegEntryOffset == 0)
+ bootx_add_display_props(base, mem_end, 0);
+ }
+ else if (node == bootx_info->dispDeviceRegEntryOffset)
+ bootx_add_display_props(base, mem_end, 1);
+
+ /* do all our children */
+ cpp = &np->child;
+ while(*cpp) {
+ np = (struct bootx_dt_node *)(base + *cpp);
+ bootx_scan_dt_build_struct(base, *cpp, mem_end);
+ cpp = &np->sibling;
+ }
+
+ dt_push_token(OF_DT_END_NODE, mem_end);
+}
+
+static unsigned long __init bootx_flatten_dt(unsigned long start)
+{
+ boot_infos_t *bi = bootx_info;
+ unsigned long mem_start, mem_end;
+ struct boot_param_header *hdr;
+ unsigned long base;
+ u64 *rsvmap;
+
+ /* Start using memory after the big blob passed by BootX, get
+ * some space for the header
+ */
+ mem_start = mem_end = ALIGN(((unsigned long)bi) + start, 4);
+ DBG("Boot params header at: %x\n", mem_start);
+ hdr = (struct boot_param_header *)mem_start;
+ mem_end += sizeof(struct boot_param_header);
+ rsvmap = (u64 *)(ALIGN(mem_end, 8));
+ hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - mem_start;
+ mem_end = ((unsigned long)rsvmap) + 8 * sizeof(u64);
+
+ /* Get base of tree */
+ base = ((unsigned long)bi) + bi->deviceTreeOffset;
+
+ /* Build string array */
+ DBG("Building string array at: %x\n", mem_end);
+ DBG("Device Tree Base=%x\n", base);
+ bootx_dt_strbase = mem_end;
+ mem_end += 4;
+ bootx_dt_strend = mem_end;
+ bootx_scan_dt_build_strings(base, 4, &mem_end);
+ /* Add some strings */
+ bootx_dt_add_string("linux,bootx-noscreen", &mem_end);
+ bootx_dt_add_string("linux,bootx-depth", &mem_end);
+ bootx_dt_add_string("linux,bootx-width", &mem_end);
+ bootx_dt_add_string("linux,bootx-height", &mem_end);
+ bootx_dt_add_string("linux,bootx-linebytes", &mem_end);
+ bootx_dt_add_string("linux,bootx-addr", &mem_end);
+ /* Wrap up strings */
+ hdr->off_dt_strings = bootx_dt_strbase - mem_start;
+ hdr->dt_strings_size = bootx_dt_strend - bootx_dt_strbase;
+
+ /* Build structure */
+ mem_end = ALIGN(mem_end, 16);
+ DBG("Building device tree structure at: %x\n", mem_end);
+ hdr->off_dt_struct = mem_end - mem_start;
+ bootx_scan_dt_build_struct(base, 4, &mem_end);
+ dt_push_token(OF_DT_END, &mem_end);
+
+ /* Finish header */
+ hdr->boot_cpuid_phys = 0;
+ hdr->magic = OF_DT_HEADER;
+ hdr->totalsize = mem_end - mem_start;
+ hdr->version = OF_DT_VERSION;
+ /* Version 16 is not backward compatible */
+ hdr->last_comp_version = 0x10;
+
+ /* Reserve the whole thing and copy the reserve map in, we
+ * also bump mem_reserve_cnt to cause further reservations to
+ * fail since it's too late.
+ */
+ mem_end = ALIGN(mem_end, PAGE_SIZE);
+ DBG("End of boot params: %x\n", mem_end);
+ rsvmap[0] = mem_start;
+ rsvmap[1] = mem_end;
+ if (bootx_info->ramDisk) {
+ rsvmap[2] = ((unsigned long)bootx_info) + bootx_info->ramDisk;
+ rsvmap[3] = rsvmap[2] + bootx_info->ramDiskSize;
+ rsvmap[4] = 0;
+ rsvmap[5] = 0;
+ } else {
+ rsvmap[2] = 0;
+ rsvmap[3] = 0;
+ }
+
+ return (unsigned long)hdr;
+}
+
+
+#ifdef CONFIG_BOOTX_TEXT
+static void __init btext_welcome(boot_infos_t *bi)
+{
+ unsigned long flags;
+ unsigned long pvr;
+
+ bootx_printf("Welcome to Linux, kernel " UTS_RELEASE "\n");
+ bootx_printf("\nlinked at : 0x%x", KERNELBASE);
+ bootx_printf("\nframe buffer at : 0x%x", bi->dispDeviceBase);
+ bootx_printf(" (phys), 0x%x", bi->logicalDisplayBase);
+ bootx_printf(" (log)");
+ bootx_printf("\nklimit : 0x%x",(unsigned long)_end);
+ bootx_printf("\nboot_info at : 0x%x", bi);
+ __asm__ __volatile__ ("mfmsr %0" : "=r" (flags));
+ bootx_printf("\nMSR : 0x%x", flags);
+ __asm__ __volatile__ ("mfspr %0, 287" : "=r" (pvr));
+ bootx_printf("\nPVR : 0x%x", pvr);
+ pvr >>= 16;
+ if (pvr > 1) {
+ __asm__ __volatile__ ("mfspr %0, 1008" : "=r" (flags));
+ bootx_printf("\nHID0 : 0x%x", flags);
+ }
+ if (pvr == 8 || pvr == 12 || pvr == 0x800c) {
+ __asm__ __volatile__ ("mfspr %0, 1019" : "=r" (flags));
+ bootx_printf("\nICTC : 0x%x", flags);
+ }
+#ifdef DEBUG
+ bootx_printf("\n\n");
+ bootx_printf("bi->deviceTreeOffset : 0x%x\n",
+ bi->deviceTreeOffset);
+ bootx_printf("bi->deviceTreeSize : 0x%x\n",
+ bi->deviceTreeSize);
+#endif
+ bootx_printf("\n\n");
+}
+#endif /* CONFIG_BOOTX_TEXT */
+
+void __init bootx_init(unsigned long r3, unsigned long r4)
+{
+ boot_infos_t *bi = (boot_infos_t *) r4;
+ unsigned long hdr;
+ unsigned long space;
+ unsigned long ptr;
+ char *model;
+ unsigned long offset = reloc_offset();
+
+ reloc_got2(offset);
+
+ bootx_info = bi;
+
+ /* We haven't cleared any bss at this point, make sure
+ * what we need is initialized
+ */
+ bootx_dt_strbase = bootx_dt_strend = 0;
+ bootx_node_chosen = 0;
+ bootx_disp_path[0] = 0;
+
+ if (!BOOT_INFO_IS_V2_COMPATIBLE(bi))
+ bi->logicalDisplayBase = bi->dispDeviceBase;
+
+ /* Fixup depth 16 -> 15 as that's what MacOS calls 16bpp */
+ if (bi->dispDeviceDepth == 16)
+ bi->dispDeviceDepth = 15;
+
+
+#ifdef CONFIG_BOOTX_TEXT
+ ptr = (unsigned long)bi->logicalDisplayBase;
+ ptr += bi->dispDeviceRect[1] * bi->dispDeviceRowBytes;
+ ptr += bi->dispDeviceRect[0] * ((bi->dispDeviceDepth + 7) / 8);
+ btext_setup_display(bi->dispDeviceRect[2] - bi->dispDeviceRect[0],
+ bi->dispDeviceRect[3] - bi->dispDeviceRect[1],
+ bi->dispDeviceDepth, bi->dispDeviceRowBytes,
+ (unsigned long)bi->logicalDisplayBase);
+ btext_clearscreen();
+ btext_flushscreen();
+#endif /* CONFIG_BOOTX_TEXT */
+
+ /*
+ * Test if boot-info is compatible. Done only in config
+ * CONFIG_BOOTX_TEXT since there is nothing much we can do
+ * with an incompatible version, except display a message
+ * and eventually hang the processor...
+ *
+ * I'll try to keep enough of boot-info compatible in the
+ * future to always allow display of this message;
+ */
+ if (!BOOT_INFO_IS_COMPATIBLE(bi)) {
+ bootx_printf(" !!! WARNING - Incompatible version"
+ " of BootX !!!\n\n\n");
+ for (;;)
+ ;
+ }
+ if (bi->architecture != BOOT_ARCH_PCI) {
+ bootx_printf(" !!! WARNING - Unsupported machine"
+ " architecture !\n");
+ for (;;)
+ ;
+ }
+
+#ifdef CONFIG_BOOTX_TEXT
+ btext_welcome(bi);
+#endif
+
+ /* New BootX enters kernel with MMU off, i/os are not allowed
+ * here. This hack will have been done by the boostrap anyway.
+ */
+ if (bi->version < 4) {
+ /*
+ * XXX If this is an iMac, turn off the USB controller.
+ */
+ model = (char *) bootx_early_getprop(r4 + bi->deviceTreeOffset,
+ 4, "model");
+ if (model
+ && (strcmp(model, "iMac,1") == 0
+ || strcmp(model, "PowerMac1,1") == 0)) {
+ bootx_printf("iMac,1 detected, shutting down USB\n");
+ out_le32((unsigned __iomem *)0x80880008, 1); /* XXX */
+ }
+ }
+
+ /* Get a pointer that points above the device tree, args, ramdisk,
+ * etc... to use for generating the flattened tree
+ */
+ if (bi->version < 5) {
+ space = bi->deviceTreeOffset + bi->deviceTreeSize;
+ if (bi->ramDisk >= space)
+ space = bi->ramDisk + bi->ramDiskSize;
+ } else
+ space = bi->totalParamsSize;
+
+ bootx_printf("Total space used by parameters & ramdisk: 0x%x\n", space);
+
+ /* New BootX will have flushed all TLBs and enters kernel with
+ * MMU switched OFF, so this should not be useful anymore.
+ */
+ if (bi->version < 4) {
+ unsigned long x __maybe_unused;
+
+ bootx_printf("Touching pages...\n");
+
+ /*
+ * Touch each page to make sure the PTEs for them
+ * are in the hash table - the aim is to try to avoid
+ * getting DSI exceptions while copying the kernel image.
+ */
+ for (ptr = ((unsigned long) &_stext) & PAGE_MASK;
+ ptr < (unsigned long)bi + space; ptr += PAGE_SIZE)
+ x = *(volatile unsigned long *)ptr;
+ }
+
+ /* Ok, now we need to generate a flattened device-tree to pass
+ * to the kernel
+ */
+ bootx_printf("Preparing boot params...\n");
+
+ hdr = bootx_flatten_dt(space);
+
+#ifdef CONFIG_BOOTX_TEXT
+#ifdef SET_BOOT_BAT
+ bootx_printf("Preparing BAT...\n");
+ btext_prepare_BAT();
+#else
+ btext_unmap();
+#endif
+#endif
+
+ reloc_got2(-offset);
+
+ __start(hdr, KERNELBASE + offset, 0);
+}
diff --git a/arch/powerpc/platforms/powermac/cache.S b/arch/powerpc/platforms/powermac/cache.S
new file mode 100644
index 000000000..b8ae56e9f
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/cache.S
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains low-level cache management functions
+ * used for sleep and CPU speed changes on Apple machines.
+ * (In fact the only thing that is Apple-specific is that we assume
+ * that we can read from ROM at physical address 0xfff00000.)
+ *
+ * Copyright (C) 2004 Paul Mackerras (paulus@samba.org) and
+ * Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/feature-fixups.h>
+
+/*
+ * Flush and disable all data caches (dL1, L2, L3). This is used
+ * when going to sleep, when doing a PMU based cpufreq transition,
+ * or when "offlining" a CPU on SMP machines. This code is over
+ * paranoid, but I've had enough issues with various CPU revs and
+ * bugs that I decided it was worth being over cautious
+ */
+
+_GLOBAL(flush_disable_caches)
+#ifndef CONFIG_PPC_BOOK3S_32
+ blr
+#else
+BEGIN_FTR_SECTION
+ b flush_disable_745x
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+BEGIN_FTR_SECTION
+ b flush_disable_75x
+END_FTR_SECTION_IFSET(CPU_FTR_L2CR)
+ b __flush_disable_L1
+
+/* This is the code for G3 and 74[01]0 */
+flush_disable_75x:
+ mflr r10
+
+ /* Turn off EE and DR in MSR */
+ mfmsr r11
+ rlwinm r0,r11,0,~MSR_EE
+ rlwinm r0,r0,0,~MSR_DR
+ sync
+ mtmsr r0
+ isync
+
+ /* Stop DST streams */
+BEGIN_FTR_SECTION
+ PPC_DSSALL
+ sync
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+
+ /* Stop DPM */
+ mfspr r8,SPRN_HID0 /* Save SPRN_HID0 in r8 */
+ rlwinm r4,r8,0,12,10 /* Turn off HID0[DPM] */
+ sync
+ mtspr SPRN_HID0,r4 /* Disable DPM */
+ sync
+
+ /* Disp-flush L1. We have a weird problem here that I never
+ * totally figured out. On 750FX, using the ROM for the flush
+ * results in a non-working flush. We use that workaround for
+ * now until I finally understand what's going on. --BenH
+ */
+
+ /* ROM base by default */
+ lis r4,0xfff0
+ mfpvr r3
+ srwi r3,r3,16
+ cmplwi cr0,r3,0x7000
+ bne+ 1f
+ /* RAM base on 750FX */
+ li r4,0
+1: li r4,0x4000
+ mtctr r4
+1: lwz r0,0(r4)
+ addi r4,r4,32
+ bdnz 1b
+ sync
+ isync
+
+ /* Disable / invalidate / enable L1 data */
+ mfspr r3,SPRN_HID0
+ rlwinm r3,r3,0,~(HID0_DCE | HID0_ICE)
+ mtspr SPRN_HID0,r3
+ sync
+ isync
+ ori r3,r3,(HID0_DCE|HID0_DCI|HID0_ICE|HID0_ICFI)
+ sync
+ isync
+ mtspr SPRN_HID0,r3
+ xori r3,r3,(HID0_DCI|HID0_ICFI)
+ mtspr SPRN_HID0,r3
+ sync
+
+ /* Get the current enable bit of the L2CR into r4 */
+ mfspr r5,SPRN_L2CR
+ /* Set to data-only (pre-745x bit) */
+ oris r3,r5,L2CR_L2DO@h
+ b 2f
+ /* When disabling L2, code must be in L1 */
+ .balign 32
+1: mtspr SPRN_L2CR,r3
+3: sync
+ isync
+ b 1f
+2: b 3f
+3: sync
+ isync
+ b 1b
+1: /* disp-flush L2. The interesting thing here is that the L2 can be
+ * up to 2Mb ... so using the ROM, we'll end up wrapping back to memory
+ * but that is probbaly fine. We disp-flush over 4Mb to be safe
+ */
+ lis r4,2
+ mtctr r4
+ lis r4,0xfff0
+1: lwz r0,0(r4)
+ addi r4,r4,32
+ bdnz 1b
+ sync
+ isync
+ lis r4,2
+ mtctr r4
+ lis r4,0xfff0
+1: dcbf 0,r4
+ addi r4,r4,32
+ bdnz 1b
+ sync
+ isync
+
+ /* now disable L2 */
+ rlwinm r5,r5,0,~L2CR_L2E
+ b 2f
+ /* When disabling L2, code must be in L1 */
+ .balign 32
+1: mtspr SPRN_L2CR,r5
+3: sync
+ isync
+ b 1f
+2: b 3f
+3: sync
+ isync
+ b 1b
+1: sync
+ isync
+ /* Invalidate L2. This is pre-745x, we clear the L2I bit ourselves */
+ oris r4,r5,L2CR_L2I@h
+ mtspr SPRN_L2CR,r4
+ sync
+ isync
+
+ /* Wait for the invalidation to complete */
+1: mfspr r3,SPRN_L2CR
+ rlwinm. r0,r3,0,31,31
+ bne 1b
+
+ /* Clear L2I */
+ xoris r4,r4,L2CR_L2I@h
+ sync
+ mtspr SPRN_L2CR,r4
+ sync
+
+ /* now disable the L1 data cache */
+ mfspr r0,SPRN_HID0
+ rlwinm r0,r0,0,~(HID0_DCE|HID0_ICE)
+ mtspr SPRN_HID0,r0
+ sync
+ isync
+
+ /* Restore HID0[DPM] to whatever it was before */
+ sync
+ mfspr r0,SPRN_HID0
+ rlwimi r0,r8,0,11,11 /* Turn back HID0[DPM] */
+ mtspr SPRN_HID0,r0
+ sync
+
+ /* restore DR and EE */
+ sync
+ mtmsr r11
+ isync
+
+ mtlr r10
+ blr
+_ASM_NOKPROBE_SYMBOL(flush_disable_75x)
+
+/* This code is for 745x processors */
+flush_disable_745x:
+ /* Turn off EE and DR in MSR */
+ mfmsr r11
+ rlwinm r0,r11,0,~MSR_EE
+ rlwinm r0,r0,0,~MSR_DR
+ sync
+ mtmsr r0
+ isync
+
+ /* Stop prefetch streams */
+ PPC_DSSALL
+ sync
+
+ /* Disable L2 prefetching */
+ mfspr r0,SPRN_MSSCR0
+ rlwinm r0,r0,0,0,29
+ mtspr SPRN_MSSCR0,r0
+ sync
+ isync
+ lis r4,0
+ dcbf 0,r4
+ dcbf 0,r4
+ dcbf 0,r4
+ dcbf 0,r4
+ dcbf 0,r4
+ dcbf 0,r4
+ dcbf 0,r4
+ dcbf 0,r4
+
+ /* Due to a bug with the HW flush on some CPU revs, we occasionally
+ * experience data corruption. I'm adding a displacement flush along
+ * with a dcbf loop over a few Mb to "help". The problem isn't totally
+ * fixed by this in theory, but at least, in practice, I couldn't reproduce
+ * it even with a big hammer...
+ */
+
+ lis r4,0x0002
+ mtctr r4
+ li r4,0
+1:
+ lwz r0,0(r4)
+ addi r4,r4,32 /* Go to start of next cache line */
+ bdnz 1b
+ isync
+
+ /* Now, flush the first 4MB of memory */
+ lis r4,0x0002
+ mtctr r4
+ li r4,0
+ sync
+1:
+ dcbf 0,r4
+ addi r4,r4,32 /* Go to start of next cache line */
+ bdnz 1b
+
+ /* Flush and disable the L1 data cache */
+ mfspr r6,SPRN_LDSTCR
+ lis r3,0xfff0 /* read from ROM for displacement flush */
+ li r4,0xfe /* start with only way 0 unlocked */
+ li r5,128 /* 128 lines in each way */
+1: mtctr r5
+ rlwimi r6,r4,0,24,31
+ mtspr SPRN_LDSTCR,r6
+ sync
+ isync
+2: lwz r0,0(r3) /* touch each cache line */
+ addi r3,r3,32
+ bdnz 2b
+ rlwinm r4,r4,1,24,30 /* move on to the next way */
+ ori r4,r4,1
+ cmpwi r4,0xff /* all done? */
+ bne 1b
+ /* now unlock the L1 data cache */
+ li r4,0
+ rlwimi r6,r4,0,24,31
+ sync
+ mtspr SPRN_LDSTCR,r6
+ sync
+ isync
+
+ /* Flush the L2 cache using the hardware assist */
+ mfspr r3,SPRN_L2CR
+ cmpwi r3,0 /* check if it is enabled first */
+ bge 4f
+ oris r0,r3,(L2CR_L2IO_745x|L2CR_L2DO_745x)@h
+ b 2f
+ /* When disabling/locking L2, code must be in L1 */
+ .balign 32
+1: mtspr SPRN_L2CR,r0 /* lock the L2 cache */
+3: sync
+ isync
+ b 1f
+2: b 3f
+3: sync
+ isync
+ b 1b
+1: sync
+ isync
+ ori r0,r3,L2CR_L2HWF_745x
+ sync
+ mtspr SPRN_L2CR,r0 /* set the hardware flush bit */
+3: mfspr r0,SPRN_L2CR /* wait for it to go to 0 */
+ andi. r0,r0,L2CR_L2HWF_745x
+ bne 3b
+ sync
+ rlwinm r3,r3,0,~L2CR_L2E
+ b 2f
+ /* When disabling L2, code must be in L1 */
+ .balign 32
+1: mtspr SPRN_L2CR,r3 /* disable the L2 cache */
+3: sync
+ isync
+ b 1f
+2: b 3f
+3: sync
+ isync
+ b 1b
+1: sync
+ isync
+ oris r4,r3,L2CR_L2I@h
+ mtspr SPRN_L2CR,r4
+ sync
+ isync
+1: mfspr r4,SPRN_L2CR
+ andis. r0,r4,L2CR_L2I@h
+ bne 1b
+ sync
+
+BEGIN_FTR_SECTION
+ /* Flush the L3 cache using the hardware assist */
+4: mfspr r3,SPRN_L3CR
+ cmpwi r3,0 /* check if it is enabled */
+ bge 6f
+ oris r0,r3,L3CR_L3IO@h
+ ori r0,r0,L3CR_L3DO
+ sync
+ mtspr SPRN_L3CR,r0 /* lock the L3 cache */
+ sync
+ isync
+ ori r0,r0,L3CR_L3HWF
+ sync
+ mtspr SPRN_L3CR,r0 /* set the hardware flush bit */
+5: mfspr r0,SPRN_L3CR /* wait for it to go to zero */
+ andi. r0,r0,L3CR_L3HWF
+ bne 5b
+ rlwinm r3,r3,0,~L3CR_L3E
+ sync
+ mtspr SPRN_L3CR,r3 /* disable the L3 cache */
+ sync
+ ori r4,r3,L3CR_L3I
+ mtspr SPRN_L3CR,r4
+1: mfspr r4,SPRN_L3CR
+ andi. r0,r4,L3CR_L3I
+ bne 1b
+ sync
+END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
+
+6: mfspr r0,SPRN_HID0 /* now disable the L1 data cache */
+ rlwinm r0,r0,0,~HID0_DCE
+ mtspr SPRN_HID0,r0
+ sync
+ isync
+ mtmsr r11 /* restore DR and EE */
+ isync
+ blr
+_ASM_NOKPROBE_SYMBOL(flush_disable_745x)
+#endif /* CONFIG_PPC_BOOK3S_32 */
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
new file mode 100644
index 000000000..ae62d432d
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -0,0 +1,3022 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 1996-2001 Paul Mackerras (paulus@cs.anu.edu.au)
+ * Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * TODO:
+ *
+ * - Replace mdelay with some schedule loop if possible
+ * - Shorten some obfuscated delays on some routines (like modem
+ * power)
+ * - Refcount some clocks (see darwin)
+ * - Split split split...
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/spinlock.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/ioport.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <asm/sections.h>
+#include <asm/errno.h>
+#include <asm/ohare.h>
+#include <asm/heathrow.h>
+#include <asm/keylargo.h>
+#include <asm/uninorth.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/dbdma.h>
+#include <asm/pci-bridge.h>
+#include <asm/pmac_low_i2c.h>
+
+#include "pmac.h"
+
+#undef DEBUG_FEATURE
+
+#ifdef DEBUG_FEATURE
+#define DBG(fmt...) printk(KERN_DEBUG fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+extern int powersave_lowspeed;
+#endif
+
+extern int powersave_nap;
+extern struct device_node *k2_skiplist[2];
+
+/*
+ * We use a single global lock to protect accesses. Each driver has
+ * to take care of its own locking
+ */
+DEFINE_RAW_SPINLOCK(feature_lock);
+
+#define LOCK(flags) raw_spin_lock_irqsave(&feature_lock, flags);
+#define UNLOCK(flags) raw_spin_unlock_irqrestore(&feature_lock, flags);
+
+
+/*
+ * Instance of some macio stuffs
+ */
+struct macio_chip macio_chips[MAX_MACIO_CHIPS];
+
+struct macio_chip *macio_find(struct device_node *child, int type)
+{
+ while(child) {
+ int i;
+
+ for (i=0; i < MAX_MACIO_CHIPS && macio_chips[i].of_node; i++)
+ if (child == macio_chips[i].of_node &&
+ (!type || macio_chips[i].type == type))
+ return &macio_chips[i];
+ child = child->parent;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(macio_find);
+
+static const char *macio_names[] =
+{
+ "Unknown",
+ "Grand Central",
+ "OHare",
+ "OHareII",
+ "Heathrow",
+ "Gatwick",
+ "Paddington",
+ "Keylargo",
+ "Pangea",
+ "Intrepid",
+ "K2",
+ "Shasta",
+};
+
+
+struct device_node *uninorth_node;
+u32 __iomem *uninorth_base;
+
+static u32 uninorth_rev;
+static int uninorth_maj;
+static void __iomem *u3_ht_base;
+
+/*
+ * For each motherboard family, we have a table of functions pointers
+ * that handle the various features.
+ */
+
+typedef long (*feature_call)(struct device_node *node, long param, long value);
+
+struct feature_table_entry {
+ unsigned int selector;
+ feature_call function;
+};
+
+struct pmac_mb_def
+{
+ const char* model_string;
+ const char* model_name;
+ int model_id;
+ struct feature_table_entry* features;
+ unsigned long board_flags;
+};
+static struct pmac_mb_def pmac_mb;
+
+/*
+ * Here are the chip specific feature functions
+ */
+
+#ifndef CONFIG_PPC64
+
+static int simple_feature_tweak(struct device_node *node, int type, int reg,
+ u32 mask, int value)
+{
+ struct macio_chip* macio;
+ unsigned long flags;
+
+ macio = macio_find(node, type);
+ if (!macio)
+ return -ENODEV;
+ LOCK(flags);
+ if (value)
+ MACIO_BIS(reg, mask);
+ else
+ MACIO_BIC(reg, mask);
+ (void)MACIO_IN32(reg);
+ UNLOCK(flags);
+
+ return 0;
+}
+
+static long ohare_htw_scc_enable(struct device_node *node, long param,
+ long value)
+{
+ struct macio_chip* macio;
+ unsigned long chan_mask;
+ unsigned long fcr;
+ unsigned long flags;
+ int htw, trans;
+ unsigned long rmask;
+
+ macio = macio_find(node, 0);
+ if (!macio)
+ return -ENODEV;
+ if (of_node_name_eq(node, "ch-a"))
+ chan_mask = MACIO_FLAG_SCCA_ON;
+ else if (of_node_name_eq(node, "ch-b"))
+ chan_mask = MACIO_FLAG_SCCB_ON;
+ else
+ return -ENODEV;
+
+ htw = (macio->type == macio_heathrow || macio->type == macio_paddington
+ || macio->type == macio_gatwick);
+ /* On these machines, the HRW_SCC_TRANS_EN_N bit mustn't be touched */
+ trans = (pmac_mb.model_id != PMAC_TYPE_YOSEMITE &&
+ pmac_mb.model_id != PMAC_TYPE_YIKES);
+ if (value) {
+#ifdef CONFIG_ADB_PMU
+ if ((param & 0xfff) == PMAC_SCC_IRDA)
+ pmu_enable_irled(1);
+#endif /* CONFIG_ADB_PMU */
+ LOCK(flags);
+ fcr = MACIO_IN32(OHARE_FCR);
+ /* Check if scc cell need enabling */
+ if (!(fcr & OH_SCC_ENABLE)) {
+ fcr |= OH_SCC_ENABLE;
+ if (htw) {
+ /* Side effect: this will also power up the
+ * modem, but it's too messy to figure out on which
+ * ports this controls the transceiver and on which
+ * it controls the modem
+ */
+ if (trans)
+ fcr &= ~HRW_SCC_TRANS_EN_N;
+ MACIO_OUT32(OHARE_FCR, fcr);
+ fcr |= (rmask = HRW_RESET_SCC);
+ MACIO_OUT32(OHARE_FCR, fcr);
+ } else {
+ fcr |= (rmask = OH_SCC_RESET);
+ MACIO_OUT32(OHARE_FCR, fcr);
+ }
+ UNLOCK(flags);
+ (void)MACIO_IN32(OHARE_FCR);
+ mdelay(15);
+ LOCK(flags);
+ fcr &= ~rmask;
+ MACIO_OUT32(OHARE_FCR, fcr);
+ }
+ if (chan_mask & MACIO_FLAG_SCCA_ON)
+ fcr |= OH_SCCA_IO;
+ if (chan_mask & MACIO_FLAG_SCCB_ON)
+ fcr |= OH_SCCB_IO;
+ MACIO_OUT32(OHARE_FCR, fcr);
+ macio->flags |= chan_mask;
+ UNLOCK(flags);
+ if (param & PMAC_SCC_FLAG_XMON)
+ macio->flags |= MACIO_FLAG_SCC_LOCKED;
+ } else {
+ if (macio->flags & MACIO_FLAG_SCC_LOCKED)
+ return -EPERM;
+ LOCK(flags);
+ fcr = MACIO_IN32(OHARE_FCR);
+ if (chan_mask & MACIO_FLAG_SCCA_ON)
+ fcr &= ~OH_SCCA_IO;
+ if (chan_mask & MACIO_FLAG_SCCB_ON)
+ fcr &= ~OH_SCCB_IO;
+ MACIO_OUT32(OHARE_FCR, fcr);
+ if ((fcr & (OH_SCCA_IO | OH_SCCB_IO)) == 0) {
+ fcr &= ~OH_SCC_ENABLE;
+ if (htw && trans)
+ fcr |= HRW_SCC_TRANS_EN_N;
+ MACIO_OUT32(OHARE_FCR, fcr);
+ }
+ macio->flags &= ~(chan_mask);
+ UNLOCK(flags);
+ mdelay(10);
+#ifdef CONFIG_ADB_PMU
+ if ((param & 0xfff) == PMAC_SCC_IRDA)
+ pmu_enable_irled(0);
+#endif /* CONFIG_ADB_PMU */
+ }
+ return 0;
+}
+
+static long ohare_floppy_enable(struct device_node *node, long param,
+ long value)
+{
+ return simple_feature_tweak(node, macio_ohare,
+ OHARE_FCR, OH_FLOPPY_ENABLE, value);
+}
+
+static long ohare_mesh_enable(struct device_node *node, long param, long value)
+{
+ return simple_feature_tweak(node, macio_ohare,
+ OHARE_FCR, OH_MESH_ENABLE, value);
+}
+
+static long ohare_ide_enable(struct device_node *node, long param, long value)
+{
+ switch(param) {
+ case 0:
+ /* For some reason, setting the bit in set_initial_features()
+ * doesn't stick. I'm still investigating... --BenH.
+ */
+ if (value)
+ simple_feature_tweak(node, macio_ohare,
+ OHARE_FCR, OH_IOBUS_ENABLE, 1);
+ return simple_feature_tweak(node, macio_ohare,
+ OHARE_FCR, OH_IDE0_ENABLE, value);
+ case 1:
+ return simple_feature_tweak(node, macio_ohare,
+ OHARE_FCR, OH_BAY_IDE_ENABLE, value);
+ default:
+ return -ENODEV;
+ }
+}
+
+static long ohare_ide_reset(struct device_node *node, long param, long value)
+{
+ switch(param) {
+ case 0:
+ return simple_feature_tweak(node, macio_ohare,
+ OHARE_FCR, OH_IDE0_RESET_N, !value);
+ case 1:
+ return simple_feature_tweak(node, macio_ohare,
+ OHARE_FCR, OH_IDE1_RESET_N, !value);
+ default:
+ return -ENODEV;
+ }
+}
+
+static long ohare_sleep_state(struct device_node *node, long param, long value)
+{
+ struct macio_chip* macio = &macio_chips[0];
+
+ if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+ return -EPERM;
+ if (value == 1) {
+ MACIO_BIC(OHARE_FCR, OH_IOBUS_ENABLE);
+ } else if (value == 0) {
+ MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+ }
+
+ return 0;
+}
+
+static long heathrow_modem_enable(struct device_node *node, long param,
+ long value)
+{
+ struct macio_chip* macio;
+ u8 gpio;
+ unsigned long flags;
+
+ macio = macio_find(node, macio_unknown);
+ if (!macio)
+ return -ENODEV;
+ gpio = MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1;
+ if (!value) {
+ LOCK(flags);
+ MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio);
+ UNLOCK(flags);
+ (void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+ mdelay(250);
+ }
+ if (pmac_mb.model_id != PMAC_TYPE_YOSEMITE &&
+ pmac_mb.model_id != PMAC_TYPE_YIKES) {
+ LOCK(flags);
+ if (value)
+ MACIO_BIC(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+ else
+ MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+ UNLOCK(flags);
+ (void)MACIO_IN32(HEATHROW_FCR);
+ mdelay(250);
+ }
+ if (value) {
+ LOCK(flags);
+ MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1);
+ (void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+ UNLOCK(flags); mdelay(250); LOCK(flags);
+ MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio);
+ (void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+ UNLOCK(flags); mdelay(250); LOCK(flags);
+ MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1);
+ (void)MACIO_IN8(HRW_GPIO_MODEM_RESET);
+ UNLOCK(flags); mdelay(250);
+ }
+ return 0;
+}
+
+static long heathrow_floppy_enable(struct device_node *node, long param,
+ long value)
+{
+ return simple_feature_tweak(node, macio_unknown,
+ HEATHROW_FCR,
+ HRW_SWIM_ENABLE|HRW_BAY_FLOPPY_ENABLE,
+ value);
+}
+
+static long heathrow_mesh_enable(struct device_node *node, long param,
+ long value)
+{
+ struct macio_chip* macio;
+ unsigned long flags;
+
+ macio = macio_find(node, macio_unknown);
+ if (!macio)
+ return -ENODEV;
+ LOCK(flags);
+ /* Set clear mesh cell enable */
+ if (value)
+ MACIO_BIS(HEATHROW_FCR, HRW_MESH_ENABLE);
+ else
+ MACIO_BIC(HEATHROW_FCR, HRW_MESH_ENABLE);
+ (void)MACIO_IN32(HEATHROW_FCR);
+ udelay(10);
+ /* Set/Clear termination power */
+ if (value)
+ MACIO_BIC(HEATHROW_MBCR, 0x04000000);
+ else
+ MACIO_BIS(HEATHROW_MBCR, 0x04000000);
+ (void)MACIO_IN32(HEATHROW_MBCR);
+ udelay(10);
+ UNLOCK(flags);
+
+ return 0;
+}
+
+static long heathrow_ide_enable(struct device_node *node, long param,
+ long value)
+{
+ switch(param) {
+ case 0:
+ return simple_feature_tweak(node, macio_unknown,
+ HEATHROW_FCR, HRW_IDE0_ENABLE, value);
+ case 1:
+ return simple_feature_tweak(node, macio_unknown,
+ HEATHROW_FCR, HRW_BAY_IDE_ENABLE, value);
+ default:
+ return -ENODEV;
+ }
+}
+
+static long heathrow_ide_reset(struct device_node *node, long param,
+ long value)
+{
+ switch(param) {
+ case 0:
+ return simple_feature_tweak(node, macio_unknown,
+ HEATHROW_FCR, HRW_IDE0_RESET_N, !value);
+ case 1:
+ return simple_feature_tweak(node, macio_unknown,
+ HEATHROW_FCR, HRW_IDE1_RESET_N, !value);
+ default:
+ return -ENODEV;
+ }
+}
+
+static long heathrow_bmac_enable(struct device_node *node, long param,
+ long value)
+{
+ struct macio_chip* macio;
+ unsigned long flags;
+
+ macio = macio_find(node, 0);
+ if (!macio)
+ return -ENODEV;
+ if (value) {
+ LOCK(flags);
+ MACIO_BIS(HEATHROW_FCR, HRW_BMAC_IO_ENABLE);
+ MACIO_BIS(HEATHROW_FCR, HRW_BMAC_RESET);
+ UNLOCK(flags);
+ (void)MACIO_IN32(HEATHROW_FCR);
+ mdelay(10);
+ LOCK(flags);
+ MACIO_BIC(HEATHROW_FCR, HRW_BMAC_RESET);
+ UNLOCK(flags);
+ (void)MACIO_IN32(HEATHROW_FCR);
+ mdelay(10);
+ } else {
+ LOCK(flags);
+ MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE);
+ UNLOCK(flags);
+ }
+ return 0;
+}
+
+static long heathrow_sound_enable(struct device_node *node, long param,
+ long value)
+{
+ struct macio_chip* macio;
+ unsigned long flags;
+
+ /* B&W G3 and Yikes don't support that properly (the
+ * sound appear to never come back after being shut down).
+ */
+ if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE ||
+ pmac_mb.model_id == PMAC_TYPE_YIKES)
+ return 0;
+
+ macio = macio_find(node, 0);
+ if (!macio)
+ return -ENODEV;
+ if (value) {
+ LOCK(flags);
+ MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+ MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N);
+ UNLOCK(flags);
+ (void)MACIO_IN32(HEATHROW_FCR);
+ } else {
+ LOCK(flags);
+ MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N);
+ MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+ UNLOCK(flags);
+ }
+ return 0;
+}
+
+static u32 save_fcr[6];
+static u32 save_mbcr;
+static struct dbdma_regs save_dbdma[13];
+static struct dbdma_regs save_alt_dbdma[13];
+
+static void dbdma_save(struct macio_chip *macio, struct dbdma_regs *save)
+{
+ int i;
+
+ /* Save state & config of DBDMA channels */
+ for (i = 0; i < 13; i++) {
+ volatile struct dbdma_regs __iomem * chan = (void __iomem *)
+ (macio->base + ((0x8000+i*0x100)>>2));
+ save[i].cmdptr_hi = in_le32(&chan->cmdptr_hi);
+ save[i].cmdptr = in_le32(&chan->cmdptr);
+ save[i].intr_sel = in_le32(&chan->intr_sel);
+ save[i].br_sel = in_le32(&chan->br_sel);
+ save[i].wait_sel = in_le32(&chan->wait_sel);
+ }
+}
+
+static void dbdma_restore(struct macio_chip *macio, struct dbdma_regs *save)
+{
+ int i;
+
+ /* Save state & config of DBDMA channels */
+ for (i = 0; i < 13; i++) {
+ volatile struct dbdma_regs __iomem * chan = (void __iomem *)
+ (macio->base + ((0x8000+i*0x100)>>2));
+ out_le32(&chan->control, (ACTIVE|DEAD|WAKE|FLUSH|PAUSE|RUN)<<16);
+ while (in_le32(&chan->status) & ACTIVE)
+ mb();
+ out_le32(&chan->cmdptr_hi, save[i].cmdptr_hi);
+ out_le32(&chan->cmdptr, save[i].cmdptr);
+ out_le32(&chan->intr_sel, save[i].intr_sel);
+ out_le32(&chan->br_sel, save[i].br_sel);
+ out_le32(&chan->wait_sel, save[i].wait_sel);
+ }
+}
+
+static void heathrow_sleep(struct macio_chip *macio, int secondary)
+{
+ if (secondary) {
+ dbdma_save(macio, save_alt_dbdma);
+ save_fcr[2] = MACIO_IN32(0x38);
+ save_fcr[3] = MACIO_IN32(0x3c);
+ } else {
+ dbdma_save(macio, save_dbdma);
+ save_fcr[0] = MACIO_IN32(0x38);
+ save_fcr[1] = MACIO_IN32(0x3c);
+ save_mbcr = MACIO_IN32(0x34);
+ /* Make sure sound is shut down */
+ MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N);
+ MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+ /* This seems to be necessary as well or the fan
+ * keeps coming up and battery drains fast */
+ MACIO_BIC(HEATHROW_FCR, HRW_IOBUS_ENABLE);
+ MACIO_BIC(HEATHROW_FCR, HRW_IDE0_RESET_N);
+ /* Make sure eth is down even if module or sleep
+ * won't work properly */
+ MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE | HRW_BMAC_RESET);
+ }
+ /* Make sure modem is shut down */
+ MACIO_OUT8(HRW_GPIO_MODEM_RESET,
+ MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1);
+ MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N);
+ MACIO_BIC(HEATHROW_FCR, OH_SCCA_IO|OH_SCCB_IO|HRW_SCC_ENABLE);
+
+ /* Let things settle */
+ (void)MACIO_IN32(HEATHROW_FCR);
+}
+
+static void heathrow_wakeup(struct macio_chip *macio, int secondary)
+{
+ if (secondary) {
+ MACIO_OUT32(0x38, save_fcr[2]);
+ (void)MACIO_IN32(0x38);
+ mdelay(1);
+ MACIO_OUT32(0x3c, save_fcr[3]);
+ (void)MACIO_IN32(0x38);
+ mdelay(10);
+ dbdma_restore(macio, save_alt_dbdma);
+ } else {
+ MACIO_OUT32(0x38, save_fcr[0] | HRW_IOBUS_ENABLE);
+ (void)MACIO_IN32(0x38);
+ mdelay(1);
+ MACIO_OUT32(0x3c, save_fcr[1]);
+ (void)MACIO_IN32(0x38);
+ mdelay(1);
+ MACIO_OUT32(0x34, save_mbcr);
+ (void)MACIO_IN32(0x38);
+ mdelay(10);
+ dbdma_restore(macio, save_dbdma);
+ }
+}
+
+static long heathrow_sleep_state(struct device_node *node, long param,
+ long value)
+{
+ if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+ return -EPERM;
+ if (value == 1) {
+ if (macio_chips[1].type == macio_gatwick)
+ heathrow_sleep(&macio_chips[0], 1);
+ heathrow_sleep(&macio_chips[0], 0);
+ } else if (value == 0) {
+ heathrow_wakeup(&macio_chips[0], 0);
+ if (macio_chips[1].type == macio_gatwick)
+ heathrow_wakeup(&macio_chips[0], 1);
+ }
+ return 0;
+}
+
+static long core99_scc_enable(struct device_node *node, long param, long value)
+{
+ struct macio_chip* macio;
+ unsigned long flags;
+ unsigned long chan_mask;
+ u32 fcr;
+
+ macio = macio_find(node, 0);
+ if (!macio)
+ return -ENODEV;
+ if (of_node_name_eq(node, "ch-a"))
+ chan_mask = MACIO_FLAG_SCCA_ON;
+ else if (of_node_name_eq(node, "ch-b"))
+ chan_mask = MACIO_FLAG_SCCB_ON;
+ else
+ return -ENODEV;
+
+ if (value) {
+ int need_reset_scc = 0;
+ int need_reset_irda = 0;
+
+ LOCK(flags);
+ fcr = MACIO_IN32(KEYLARGO_FCR0);
+ /* Check if scc cell need enabling */
+ if (!(fcr & KL0_SCC_CELL_ENABLE)) {
+ fcr |= KL0_SCC_CELL_ENABLE;
+ need_reset_scc = 1;
+ }
+ if (chan_mask & MACIO_FLAG_SCCA_ON) {
+ fcr |= KL0_SCCA_ENABLE;
+ /* Don't enable line drivers for I2S modem */
+ if ((param & 0xfff) == PMAC_SCC_I2S1)
+ fcr &= ~KL0_SCC_A_INTF_ENABLE;
+ else
+ fcr |= KL0_SCC_A_INTF_ENABLE;
+ }
+ if (chan_mask & MACIO_FLAG_SCCB_ON) {
+ fcr |= KL0_SCCB_ENABLE;
+ /* Perform irda specific inits */
+ if ((param & 0xfff) == PMAC_SCC_IRDA) {
+ fcr &= ~KL0_SCC_B_INTF_ENABLE;
+ fcr |= KL0_IRDA_ENABLE;
+ fcr |= KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE;
+ fcr |= KL0_IRDA_SOURCE1_SEL;
+ fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0);
+ fcr &= ~(KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND);
+ need_reset_irda = 1;
+ } else
+ fcr |= KL0_SCC_B_INTF_ENABLE;
+ }
+ MACIO_OUT32(KEYLARGO_FCR0, fcr);
+ macio->flags |= chan_mask;
+ if (need_reset_scc) {
+ MACIO_BIS(KEYLARGO_FCR0, KL0_SCC_RESET);
+ (void)MACIO_IN32(KEYLARGO_FCR0);
+ UNLOCK(flags);
+ mdelay(15);
+ LOCK(flags);
+ MACIO_BIC(KEYLARGO_FCR0, KL0_SCC_RESET);
+ }
+ if (need_reset_irda) {
+ MACIO_BIS(KEYLARGO_FCR0, KL0_IRDA_RESET);
+ (void)MACIO_IN32(KEYLARGO_FCR0);
+ UNLOCK(flags);
+ mdelay(15);
+ LOCK(flags);
+ MACIO_BIC(KEYLARGO_FCR0, KL0_IRDA_RESET);
+ }
+ UNLOCK(flags);
+ if (param & PMAC_SCC_FLAG_XMON)
+ macio->flags |= MACIO_FLAG_SCC_LOCKED;
+ } else {
+ if (macio->flags & MACIO_FLAG_SCC_LOCKED)
+ return -EPERM;
+ LOCK(flags);
+ fcr = MACIO_IN32(KEYLARGO_FCR0);
+ if (chan_mask & MACIO_FLAG_SCCA_ON)
+ fcr &= ~KL0_SCCA_ENABLE;
+ if (chan_mask & MACIO_FLAG_SCCB_ON) {
+ fcr &= ~KL0_SCCB_ENABLE;
+ /* Perform irda specific clears */
+ if ((param & 0xfff) == PMAC_SCC_IRDA) {
+ fcr &= ~KL0_IRDA_ENABLE;
+ fcr &= ~(KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE);
+ fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0);
+ fcr &= ~(KL0_IRDA_SOURCE1_SEL|KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND);
+ }
+ }
+ MACIO_OUT32(KEYLARGO_FCR0, fcr);
+ if ((fcr & (KL0_SCCA_ENABLE | KL0_SCCB_ENABLE)) == 0) {
+ fcr &= ~KL0_SCC_CELL_ENABLE;
+ MACIO_OUT32(KEYLARGO_FCR0, fcr);
+ }
+ macio->flags &= ~(chan_mask);
+ UNLOCK(flags);
+ mdelay(10);
+ }
+ return 0;
+}
+
+static long
+core99_modem_enable(struct device_node *node, long param, long value)
+{
+ struct macio_chip* macio;
+ u8 gpio;
+ unsigned long flags;
+
+ /* Hack for internal USB modem */
+ if (node == NULL) {
+ if (macio_chips[0].type != macio_keylargo)
+ return -ENODEV;
+ node = macio_chips[0].of_node;
+ }
+ macio = macio_find(node, 0);
+ if (!macio)
+ return -ENODEV;
+ gpio = MACIO_IN8(KL_GPIO_MODEM_RESET);
+ gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE;
+ gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA;
+
+ if (!value) {
+ LOCK(flags);
+ MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+ UNLOCK(flags);
+ (void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+ mdelay(250);
+ }
+ LOCK(flags);
+ if (value) {
+ MACIO_BIC(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+ UNLOCK(flags);
+ (void)MACIO_IN32(KEYLARGO_FCR2);
+ mdelay(250);
+ } else {
+ MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+ UNLOCK(flags);
+ }
+ if (value) {
+ LOCK(flags);
+ MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+ (void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+ UNLOCK(flags); mdelay(250); LOCK(flags);
+ MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+ (void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+ UNLOCK(flags); mdelay(250); LOCK(flags);
+ MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+ (void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+ UNLOCK(flags); mdelay(250);
+ }
+ return 0;
+}
+
+static long
+pangea_modem_enable(struct device_node *node, long param, long value)
+{
+ struct macio_chip* macio;
+ u8 gpio;
+ unsigned long flags;
+
+ /* Hack for internal USB modem */
+ if (node == NULL) {
+ if (macio_chips[0].type != macio_pangea &&
+ macio_chips[0].type != macio_intrepid)
+ return -ENODEV;
+ node = macio_chips[0].of_node;
+ }
+ macio = macio_find(node, 0);
+ if (!macio)
+ return -ENODEV;
+ gpio = MACIO_IN8(KL_GPIO_MODEM_RESET);
+ gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE;
+ gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA;
+
+ if (!value) {
+ LOCK(flags);
+ MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+ UNLOCK(flags);
+ (void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+ mdelay(250);
+ }
+ LOCK(flags);
+ if (value) {
+ MACIO_OUT8(KL_GPIO_MODEM_POWER,
+ KEYLARGO_GPIO_OUTPUT_ENABLE);
+ UNLOCK(flags);
+ (void)MACIO_IN32(KEYLARGO_FCR2);
+ mdelay(250);
+ } else {
+ MACIO_OUT8(KL_GPIO_MODEM_POWER,
+ KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
+ UNLOCK(flags);
+ }
+ if (value) {
+ LOCK(flags);
+ MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+ (void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+ UNLOCK(flags); mdelay(250); LOCK(flags);
+ MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio);
+ (void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+ UNLOCK(flags); mdelay(250); LOCK(flags);
+ MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA);
+ (void)MACIO_IN8(KL_GPIO_MODEM_RESET);
+ UNLOCK(flags); mdelay(250);
+ }
+ return 0;
+}
+
+static long
+core99_ata100_enable(struct device_node *node, long value)
+{
+ unsigned long flags;
+ struct pci_dev *pdev = NULL;
+ u8 pbus, pid;
+ int rc;
+
+ if (uninorth_rev < 0x24)
+ return -ENODEV;
+
+ LOCK(flags);
+ if (value)
+ UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100);
+ else
+ UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100);
+ (void)UN_IN(UNI_N_CLOCK_CNTL);
+ UNLOCK(flags);
+ udelay(20);
+
+ if (value) {
+ if (pci_device_from_OF_node(node, &pbus, &pid) == 0)
+ pdev = pci_get_domain_bus_and_slot(0, pbus, pid);
+ if (pdev == NULL)
+ return 0;
+ rc = pci_enable_device(pdev);
+ if (rc == 0)
+ pci_set_master(pdev);
+ pci_dev_put(pdev);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+static long
+core99_ide_enable(struct device_node *node, long param, long value)
+{
+ /* Bus ID 0 to 2 are KeyLargo based IDE, busID 3 is U2
+ * based ata-100
+ */
+ switch(param) {
+ case 0:
+ return simple_feature_tweak(node, macio_unknown,
+ KEYLARGO_FCR1, KL1_EIDE0_ENABLE, value);
+ case 1:
+ return simple_feature_tweak(node, macio_unknown,
+ KEYLARGO_FCR1, KL1_EIDE1_ENABLE, value);
+ case 2:
+ return simple_feature_tweak(node, macio_unknown,
+ KEYLARGO_FCR1, KL1_UIDE_ENABLE, value);
+ case 3:
+ return core99_ata100_enable(node, value);
+ default:
+ return -ENODEV;
+ }
+}
+
+static long
+core99_ide_reset(struct device_node *node, long param, long value)
+{
+ switch(param) {
+ case 0:
+ return simple_feature_tweak(node, macio_unknown,
+ KEYLARGO_FCR1, KL1_EIDE0_RESET_N, !value);
+ case 1:
+ return simple_feature_tweak(node, macio_unknown,
+ KEYLARGO_FCR1, KL1_EIDE1_RESET_N, !value);
+ case 2:
+ return simple_feature_tweak(node, macio_unknown,
+ KEYLARGO_FCR1, KL1_UIDE_RESET_N, !value);
+ default:
+ return -ENODEV;
+ }
+}
+
+static long
+core99_gmac_enable(struct device_node *node, long param, long value)
+{
+ unsigned long flags;
+
+ LOCK(flags);
+ if (value)
+ UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC);
+ else
+ UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC);
+ (void)UN_IN(UNI_N_CLOCK_CNTL);
+ UNLOCK(flags);
+ udelay(20);
+
+ return 0;
+}
+
+static long
+core99_gmac_phy_reset(struct device_node *node, long param, long value)
+{
+ unsigned long flags;
+ struct macio_chip *macio;
+
+ macio = &macio_chips[0];
+ if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+ macio->type != macio_intrepid)
+ return -ENODEV;
+
+ LOCK(flags);
+ MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, KEYLARGO_GPIO_OUTPUT_ENABLE);
+ (void)MACIO_IN8(KL_GPIO_ETH_PHY_RESET);
+ UNLOCK(flags);
+ mdelay(10);
+ LOCK(flags);
+ MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, /*KEYLARGO_GPIO_OUTPUT_ENABLE | */
+ KEYLARGO_GPIO_OUTOUT_DATA);
+ UNLOCK(flags);
+ mdelay(10);
+
+ return 0;
+}
+
+static long
+core99_sound_chip_enable(struct device_node *node, long param, long value)
+{
+ struct macio_chip* macio;
+ unsigned long flags;
+
+ macio = macio_find(node, 0);
+ if (!macio)
+ return -ENODEV;
+
+ /* Do a better probe code, screamer G4 desktops &
+ * iMacs can do that too, add a recalibrate in
+ * the driver as well
+ */
+ if (pmac_mb.model_id == PMAC_TYPE_PISMO ||
+ pmac_mb.model_id == PMAC_TYPE_TITANIUM) {
+ LOCK(flags);
+ if (value)
+ MACIO_OUT8(KL_GPIO_SOUND_POWER,
+ KEYLARGO_GPIO_OUTPUT_ENABLE |
+ KEYLARGO_GPIO_OUTOUT_DATA);
+ else
+ MACIO_OUT8(KL_GPIO_SOUND_POWER,
+ KEYLARGO_GPIO_OUTPUT_ENABLE);
+ (void)MACIO_IN8(KL_GPIO_SOUND_POWER);
+ UNLOCK(flags);
+ }
+ return 0;
+}
+
+static long
+core99_airport_enable(struct device_node *node, long param, long value)
+{
+ struct macio_chip* macio;
+ unsigned long flags;
+ int state;
+
+ macio = macio_find(node, 0);
+ if (!macio)
+ return -ENODEV;
+
+ /* Hint: we allow passing of macio itself for the sake of the
+ * sleep code
+ */
+ if (node != macio->of_node &&
+ (!node->parent || node->parent != macio->of_node))
+ return -ENODEV;
+ state = (macio->flags & MACIO_FLAG_AIRPORT_ON) != 0;
+ if (value == state)
+ return 0;
+ if (value) {
+ /* This code is a reproduction of OF enable-cardslot
+ * and init-wireless methods, slightly hacked until
+ * I got it working.
+ */
+ LOCK(flags);
+ MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 5);
+ (void)MACIO_IN8(KEYLARGO_GPIO_0+0xf);
+ UNLOCK(flags);
+ mdelay(10);
+ LOCK(flags);
+ MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 4);
+ (void)MACIO_IN8(KEYLARGO_GPIO_0+0xf);
+ UNLOCK(flags);
+
+ mdelay(10);
+
+ LOCK(flags);
+ MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16);
+ (void)MACIO_IN32(KEYLARGO_FCR2);
+ udelay(10);
+ MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xb, 0);
+ (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xb);
+ udelay(10);
+ MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xa, 0x28);
+ (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xa);
+ udelay(10);
+ MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xd, 0x28);
+ (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xd);
+ udelay(10);
+ MACIO_OUT8(KEYLARGO_GPIO_0+0xd, 0x28);
+ (void)MACIO_IN8(KEYLARGO_GPIO_0+0xd);
+ udelay(10);
+ MACIO_OUT8(KEYLARGO_GPIO_0+0xe, 0x28);
+ (void)MACIO_IN8(KEYLARGO_GPIO_0+0xe);
+ UNLOCK(flags);
+ udelay(10);
+ MACIO_OUT32(0x1c000, 0);
+ mdelay(1);
+ MACIO_OUT8(0x1a3e0, 0x41);
+ (void)MACIO_IN8(0x1a3e0);
+ udelay(10);
+ LOCK(flags);
+ MACIO_BIS(KEYLARGO_FCR2, KL2_CARDSEL_16);
+ (void)MACIO_IN32(KEYLARGO_FCR2);
+ UNLOCK(flags);
+ mdelay(100);
+
+ macio->flags |= MACIO_FLAG_AIRPORT_ON;
+ } else {
+ LOCK(flags);
+ MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16);
+ (void)MACIO_IN32(KEYLARGO_FCR2);
+ MACIO_OUT8(KL_GPIO_AIRPORT_0, 0);
+ MACIO_OUT8(KL_GPIO_AIRPORT_1, 0);
+ MACIO_OUT8(KL_GPIO_AIRPORT_2, 0);
+ MACIO_OUT8(KL_GPIO_AIRPORT_3, 0);
+ MACIO_OUT8(KL_GPIO_AIRPORT_4, 0);
+ (void)MACIO_IN8(KL_GPIO_AIRPORT_4);
+ UNLOCK(flags);
+
+ macio->flags &= ~MACIO_FLAG_AIRPORT_ON;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_SMP
+static long
+core99_reset_cpu(struct device_node *node, long param, long value)
+{
+ unsigned int reset_io = 0;
+ unsigned long flags;
+ struct macio_chip *macio;
+ struct device_node *np;
+ const int dflt_reset_lines[] = { KL_GPIO_RESET_CPU0,
+ KL_GPIO_RESET_CPU1,
+ KL_GPIO_RESET_CPU2,
+ KL_GPIO_RESET_CPU3 };
+
+ macio = &macio_chips[0];
+ if (macio->type != macio_keylargo)
+ return -ENODEV;
+
+ for_each_of_cpu_node(np) {
+ const u32 *rst = of_get_property(np, "soft-reset", NULL);
+ if (!rst)
+ continue;
+ if (param == of_get_cpu_hwid(np, 0)) {
+ of_node_put(np);
+ reset_io = *rst;
+ break;
+ }
+ }
+ if (np == NULL || reset_io == 0)
+ reset_io = dflt_reset_lines[param];
+
+ LOCK(flags);
+ MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE);
+ (void)MACIO_IN8(reset_io);
+ udelay(1);
+ MACIO_OUT8(reset_io, 0);
+ (void)MACIO_IN8(reset_io);
+ UNLOCK(flags);
+
+ return 0;
+}
+#endif /* CONFIG_SMP */
+
+static long
+core99_usb_enable(struct device_node *node, long param, long value)
+{
+ struct macio_chip *macio;
+ unsigned long flags;
+ const char *prop;
+ int number;
+ u32 reg;
+
+ macio = &macio_chips[0];
+ if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+ macio->type != macio_intrepid)
+ return -ENODEV;
+
+ prop = of_get_property(node, "AAPL,clock-id", NULL);
+ if (!prop)
+ return -ENODEV;
+ if (strncmp(prop, "usb0u048", 8) == 0)
+ number = 0;
+ else if (strncmp(prop, "usb1u148", 8) == 0)
+ number = 2;
+ else if (strncmp(prop, "usb2u248", 8) == 0)
+ number = 4;
+ else
+ return -ENODEV;
+
+ /* Sorry for the brute-force locking, but this is only used during
+ * sleep and the timing seem to be critical
+ */
+ LOCK(flags);
+ if (value) {
+ /* Turn ON */
+ if (number == 0) {
+ MACIO_BIC(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1));
+ (void)MACIO_IN32(KEYLARGO_FCR0);
+ UNLOCK(flags);
+ mdelay(1);
+ LOCK(flags);
+ MACIO_BIS(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE);
+ } else if (number == 2) {
+ MACIO_BIC(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1));
+ UNLOCK(flags);
+ (void)MACIO_IN32(KEYLARGO_FCR0);
+ mdelay(1);
+ LOCK(flags);
+ MACIO_BIS(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE);
+ } else if (number == 4) {
+ MACIO_BIC(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1));
+ UNLOCK(flags);
+ (void)MACIO_IN32(KEYLARGO_FCR1);
+ mdelay(1);
+ LOCK(flags);
+ MACIO_BIS(KEYLARGO_FCR1, KL1_USB2_CELL_ENABLE);
+ }
+ if (number < 4) {
+ reg = MACIO_IN32(KEYLARGO_FCR4);
+ reg &= ~(KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) |
+ KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number));
+ reg &= ~(KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) |
+ KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1));
+ MACIO_OUT32(KEYLARGO_FCR4, reg);
+ (void)MACIO_IN32(KEYLARGO_FCR4);
+ udelay(10);
+ } else {
+ reg = MACIO_IN32(KEYLARGO_FCR3);
+ reg &= ~(KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) |
+ KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0));
+ reg &= ~(KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) |
+ KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1));
+ MACIO_OUT32(KEYLARGO_FCR3, reg);
+ (void)MACIO_IN32(KEYLARGO_FCR3);
+ udelay(10);
+ }
+ if (macio->type == macio_intrepid) {
+ /* wait for clock stopped bits to clear */
+ u32 test0 = 0, test1 = 0;
+ u32 status0, status1;
+ int timeout = 1000;
+
+ UNLOCK(flags);
+ switch (number) {
+ case 0:
+ test0 = UNI_N_CLOCK_STOPPED_USB0;
+ test1 = UNI_N_CLOCK_STOPPED_USB0PCI;
+ break;
+ case 2:
+ test0 = UNI_N_CLOCK_STOPPED_USB1;
+ test1 = UNI_N_CLOCK_STOPPED_USB1PCI;
+ break;
+ case 4:
+ test0 = UNI_N_CLOCK_STOPPED_USB2;
+ test1 = UNI_N_CLOCK_STOPPED_USB2PCI;
+ break;
+ }
+ do {
+ if (--timeout <= 0) {
+ printk(KERN_ERR "core99_usb_enable: "
+ "Timeout waiting for clocks\n");
+ break;
+ }
+ mdelay(1);
+ status0 = UN_IN(UNI_N_CLOCK_STOP_STATUS0);
+ status1 = UN_IN(UNI_N_CLOCK_STOP_STATUS1);
+ } while ((status0 & test0) | (status1 & test1));
+ LOCK(flags);
+ }
+ } else {
+ /* Turn OFF */
+ if (number < 4) {
+ reg = MACIO_IN32(KEYLARGO_FCR4);
+ reg |= KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) |
+ KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number);
+ reg |= KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) |
+ KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1);
+ MACIO_OUT32(KEYLARGO_FCR4, reg);
+ (void)MACIO_IN32(KEYLARGO_FCR4);
+ udelay(1);
+ } else {
+ reg = MACIO_IN32(KEYLARGO_FCR3);
+ reg |= KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) |
+ KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0);
+ reg |= KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) |
+ KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1);
+ MACIO_OUT32(KEYLARGO_FCR3, reg);
+ (void)MACIO_IN32(KEYLARGO_FCR3);
+ udelay(1);
+ }
+ if (number == 0) {
+ if (macio->type != macio_intrepid)
+ MACIO_BIC(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE);
+ (void)MACIO_IN32(KEYLARGO_FCR0);
+ udelay(1);
+ MACIO_BIS(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1));
+ (void)MACIO_IN32(KEYLARGO_FCR0);
+ } else if (number == 2) {
+ if (macio->type != macio_intrepid)
+ MACIO_BIC(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE);
+ (void)MACIO_IN32(KEYLARGO_FCR0);
+ udelay(1);
+ MACIO_BIS(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1));
+ (void)MACIO_IN32(KEYLARGO_FCR0);
+ } else if (number == 4) {
+ udelay(1);
+ MACIO_BIS(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1));
+ (void)MACIO_IN32(KEYLARGO_FCR1);
+ }
+ udelay(1);
+ }
+ UNLOCK(flags);
+
+ return 0;
+}
+
+static long
+core99_firewire_enable(struct device_node *node, long param, long value)
+{
+ unsigned long flags;
+ struct macio_chip *macio;
+
+ macio = &macio_chips[0];
+ if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+ macio->type != macio_intrepid)
+ return -ENODEV;
+ if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED))
+ return -ENODEV;
+
+ LOCK(flags);
+ if (value) {
+ UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW);
+ (void)UN_IN(UNI_N_CLOCK_CNTL);
+ } else {
+ UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW);
+ (void)UN_IN(UNI_N_CLOCK_CNTL);
+ }
+ UNLOCK(flags);
+ mdelay(1);
+
+ return 0;
+}
+
+static long
+core99_firewire_cable_power(struct device_node *node, long param, long value)
+{
+ unsigned long flags;
+ struct macio_chip *macio;
+
+ /* Trick: we allow NULL node */
+ if ((pmac_mb.board_flags & PMAC_MB_HAS_FW_POWER) == 0)
+ return -ENODEV;
+ macio = &macio_chips[0];
+ if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+ macio->type != macio_intrepid)
+ return -ENODEV;
+ if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED))
+ return -ENODEV;
+
+ LOCK(flags);
+ if (value) {
+ MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 0);
+ MACIO_IN8(KL_GPIO_FW_CABLE_POWER);
+ udelay(10);
+ } else {
+ MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 4);
+ MACIO_IN8(KL_GPIO_FW_CABLE_POWER); udelay(10);
+ }
+ UNLOCK(flags);
+ mdelay(1);
+
+ return 0;
+}
+
+static long
+intrepid_aack_delay_enable(struct device_node *node, long param, long value)
+{
+ unsigned long flags;
+
+ if (uninorth_rev < 0xd2)
+ return -ENODEV;
+
+ LOCK(flags);
+ if (param)
+ UN_BIS(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE);
+ else
+ UN_BIC(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE);
+ UNLOCK(flags);
+
+ return 0;
+}
+
+
+#endif /* CONFIG_PPC64 */
+
+static long
+core99_read_gpio(struct device_node *node, long param, long value)
+{
+ struct macio_chip *macio = &macio_chips[0];
+
+ return MACIO_IN8(param);
+}
+
+
+static long
+core99_write_gpio(struct device_node *node, long param, long value)
+{
+ struct macio_chip *macio = &macio_chips[0];
+
+ MACIO_OUT8(param, (u8)(value & 0xff));
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+static long g5_gmac_enable(struct device_node *node, long param, long value)
+{
+ struct macio_chip *macio = &macio_chips[0];
+ unsigned long flags;
+
+ if (node == NULL)
+ return -ENODEV;
+
+ LOCK(flags);
+ if (value) {
+ MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE);
+ mb();
+ k2_skiplist[0] = NULL;
+ } else {
+ k2_skiplist[0] = node;
+ mb();
+ MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE);
+ }
+
+ UNLOCK(flags);
+ mdelay(1);
+
+ return 0;
+}
+
+static long g5_fw_enable(struct device_node *node, long param, long value)
+{
+ struct macio_chip *macio = &macio_chips[0];
+ unsigned long flags;
+
+ if (node == NULL)
+ return -ENODEV;
+
+ LOCK(flags);
+ if (value) {
+ MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE);
+ mb();
+ k2_skiplist[1] = NULL;
+ } else {
+ k2_skiplist[1] = node;
+ mb();
+ MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE);
+ }
+
+ UNLOCK(flags);
+ mdelay(1);
+
+ return 0;
+}
+
+static long g5_mpic_enable(struct device_node *node, long param, long value)
+{
+ unsigned long flags;
+ struct device_node *parent = of_get_parent(node);
+ int is_u3;
+
+ if (parent == NULL)
+ return 0;
+ is_u3 = of_node_name_eq(parent, "u3") || of_node_name_eq(parent, "u4");
+ of_node_put(parent);
+ if (!is_u3)
+ return 0;
+
+ LOCK(flags);
+ UN_BIS(U3_TOGGLE_REG, U3_MPIC_RESET | U3_MPIC_OUTPUT_ENABLE);
+ UNLOCK(flags);
+
+ return 0;
+}
+
+static long g5_eth_phy_reset(struct device_node *node, long param, long value)
+{
+ struct macio_chip *macio = &macio_chips[0];
+ struct device_node *phy;
+ int need_reset;
+
+ /*
+ * We must not reset the combo PHYs, only the BCM5221 found in
+ * the iMac G5.
+ */
+ phy = of_get_next_child(node, NULL);
+ if (!phy)
+ return -ENODEV;
+ need_reset = of_device_is_compatible(phy, "B5221");
+ of_node_put(phy);
+ if (!need_reset)
+ return 0;
+
+ /* PHY reset is GPIO 29, not in device-tree unfortunately */
+ MACIO_OUT8(K2_GPIO_EXTINT_0 + 29,
+ KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA);
+ /* Thankfully, this is now always called at a time when we can
+ * schedule by sungem.
+ */
+ msleep(10);
+ MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, 0);
+
+ return 0;
+}
+
+static long g5_i2s_enable(struct device_node *node, long param, long value)
+{
+ /* Very crude implementation for now */
+ struct macio_chip *macio = &macio_chips[0];
+ unsigned long flags;
+ int cell;
+ u32 fcrs[3][3] = {
+ { 0,
+ K2_FCR1_I2S0_CELL_ENABLE |
+ K2_FCR1_I2S0_CLK_ENABLE_BIT | K2_FCR1_I2S0_ENABLE,
+ KL3_I2S0_CLK18_ENABLE
+ },
+ { KL0_SCC_A_INTF_ENABLE,
+ K2_FCR1_I2S1_CELL_ENABLE |
+ K2_FCR1_I2S1_CLK_ENABLE_BIT | K2_FCR1_I2S1_ENABLE,
+ KL3_I2S1_CLK18_ENABLE
+ },
+ { KL0_SCC_B_INTF_ENABLE,
+ SH_FCR1_I2S2_CELL_ENABLE |
+ SH_FCR1_I2S2_CLK_ENABLE_BIT | SH_FCR1_I2S2_ENABLE,
+ SH_FCR3_I2S2_CLK18_ENABLE
+ },
+ };
+
+ if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
+ return -ENODEV;
+ if (strncmp(node->name, "i2s-", 4))
+ return -ENODEV;
+ cell = node->name[4] - 'a';
+ switch(cell) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ if (macio->type == macio_shasta)
+ break;
+ fallthrough;
+ default:
+ return -ENODEV;
+ }
+
+ LOCK(flags);
+ if (value) {
+ MACIO_BIC(KEYLARGO_FCR0, fcrs[cell][0]);
+ MACIO_BIS(KEYLARGO_FCR1, fcrs[cell][1]);
+ MACIO_BIS(KEYLARGO_FCR3, fcrs[cell][2]);
+ } else {
+ MACIO_BIC(KEYLARGO_FCR3, fcrs[cell][2]);
+ MACIO_BIC(KEYLARGO_FCR1, fcrs[cell][1]);
+ MACIO_BIS(KEYLARGO_FCR0, fcrs[cell][0]);
+ }
+ udelay(10);
+ UNLOCK(flags);
+
+ return 0;
+}
+
+
+#ifdef CONFIG_SMP
+static long g5_reset_cpu(struct device_node *node, long param, long value)
+{
+ unsigned int reset_io = 0;
+ unsigned long flags;
+ struct macio_chip *macio;
+ struct device_node *np;
+
+ macio = &macio_chips[0];
+ if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
+ return -ENODEV;
+
+ for_each_of_cpu_node(np) {
+ const u32 *rst = of_get_property(np, "soft-reset", NULL);
+ if (!rst)
+ continue;
+ if (param == of_get_cpu_hwid(np, 0)) {
+ of_node_put(np);
+ reset_io = *rst;
+ break;
+ }
+ }
+ if (np == NULL || reset_io == 0)
+ return -ENODEV;
+
+ LOCK(flags);
+ MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE);
+ (void)MACIO_IN8(reset_io);
+ udelay(1);
+ MACIO_OUT8(reset_io, 0);
+ (void)MACIO_IN8(reset_io);
+ UNLOCK(flags);
+
+ return 0;
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * This can be called from pmac_smp so isn't static
+ *
+ * This takes the second CPU off the bus on dual CPU machines
+ * running UP
+ */
+void __init g5_phy_disable_cpu1(void)
+{
+ if (uninorth_maj == 3)
+ UN_OUT(U3_API_PHY_CONFIG_1, 0);
+}
+#endif /* CONFIG_PPC64 */
+
+#ifndef CONFIG_PPC64
+
+
+#ifdef CONFIG_PM
+static u32 save_gpio_levels[2];
+static u8 save_gpio_extint[KEYLARGO_GPIO_EXTINT_CNT];
+static u8 save_gpio_normal[KEYLARGO_GPIO_CNT];
+static u32 save_unin_clock_ctl;
+
+static void keylargo_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+ u32 temp;
+
+ if (sleep_mode) {
+ mdelay(1);
+ MACIO_BIS(KEYLARGO_FCR0, KL0_USB_REF_SUSPEND);
+ (void)MACIO_IN32(KEYLARGO_FCR0);
+ mdelay(1);
+ }
+
+ MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+ KL0_SCC_CELL_ENABLE |
+ KL0_IRDA_ENABLE | KL0_IRDA_CLK32_ENABLE |
+ KL0_IRDA_CLK19_ENABLE);
+
+ MACIO_BIC(KEYLARGO_MBCR, KL_MBCR_MB0_DEV_MASK);
+ MACIO_BIS(KEYLARGO_MBCR, KL_MBCR_MB0_IDE_ENABLE);
+
+ MACIO_BIC(KEYLARGO_FCR1,
+ KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT |
+ KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE |
+ KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+ KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+ KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+ KL1_EIDE0_ENABLE | KL1_EIDE0_RESET_N |
+ KL1_EIDE1_ENABLE | KL1_EIDE1_RESET_N |
+ KL1_UIDE_ENABLE);
+
+ MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+ MACIO_BIC(KEYLARGO_FCR2, KL2_IOBUS_ENABLE);
+
+ temp = MACIO_IN32(KEYLARGO_FCR3);
+ if (macio->rev >= 2) {
+ temp |= KL3_SHUTDOWN_PLL2X;
+ if (sleep_mode)
+ temp |= KL3_SHUTDOWN_PLL_TOTAL;
+ }
+
+ temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 |
+ KL3_SHUTDOWN_PLLKW35;
+ if (sleep_mode)
+ temp |= KL3_SHUTDOWN_PLLKW12;
+ temp &= ~(KL3_CLK66_ENABLE | KL3_CLK49_ENABLE | KL3_CLK45_ENABLE
+ | KL3_CLK31_ENABLE | KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE);
+ if (sleep_mode)
+ temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_VIA_CLK16_ENABLE);
+ MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+ /* Flush posted writes & wait a bit */
+ (void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
+}
+
+static void pangea_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+ u32 temp;
+
+ MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+ KL0_SCC_CELL_ENABLE |
+ KL0_USB0_CELL_ENABLE | KL0_USB1_CELL_ENABLE);
+
+ MACIO_BIC(KEYLARGO_FCR1,
+ KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT |
+ KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE |
+ KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+ KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+ KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+ KL1_UIDE_ENABLE);
+ if (pmac_mb.board_flags & PMAC_MB_MOBILE)
+ MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N);
+
+ MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT);
+
+ temp = MACIO_IN32(KEYLARGO_FCR3);
+ temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 |
+ KL3_SHUTDOWN_PLLKW35;
+ temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | KL3_CLK31_ENABLE
+ | KL3_I2S0_CLK18_ENABLE | KL3_I2S1_CLK18_ENABLE);
+ if (sleep_mode)
+ temp &= ~(KL3_VIA_CLK16_ENABLE | KL3_TIMER_CLK18_ENABLE);
+ MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+ /* Flush posted writes & wait a bit */
+ (void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1);
+}
+
+static void intrepid_shutdown(struct macio_chip *macio, int sleep_mode)
+{
+ u32 temp;
+
+ MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE |
+ KL0_SCC_CELL_ENABLE);
+
+ MACIO_BIC(KEYLARGO_FCR1,
+ KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT |
+ KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE |
+ KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE |
+ KL1_EIDE0_ENABLE);
+ if (pmac_mb.board_flags & PMAC_MB_MOBILE)
+ MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N);
+
+ temp = MACIO_IN32(KEYLARGO_FCR3);
+ temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE |
+ KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE);
+ if (sleep_mode)
+ temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_IT_VIA_CLK32_ENABLE);
+ MACIO_OUT32(KEYLARGO_FCR3, temp);
+
+ /* Flush posted writes & wait a bit */
+ (void)MACIO_IN32(KEYLARGO_FCR0);
+ mdelay(10);
+}
+
+
+static int
+core99_sleep(void)
+{
+ struct macio_chip *macio;
+ int i;
+
+ macio = &macio_chips[0];
+ if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+ macio->type != macio_intrepid)
+ return -ENODEV;
+
+ /* We power off the wireless slot in case it was not done
+ * by the driver. We don't power it on automatically however
+ */
+ if (macio->flags & MACIO_FLAG_AIRPORT_ON)
+ core99_airport_enable(macio->of_node, 0, 0);
+
+ /* We power off the FW cable. Should be done by the driver... */
+ if (macio->flags & MACIO_FLAG_FW_SUPPORTED) {
+ core99_firewire_enable(NULL, 0, 0);
+ core99_firewire_cable_power(NULL, 0, 0);
+ }
+
+ /* We make sure int. modem is off (in case driver lost it) */
+ if (macio->type == macio_keylargo)
+ core99_modem_enable(macio->of_node, 0, 0);
+ else
+ pangea_modem_enable(macio->of_node, 0, 0);
+
+ /* We make sure the sound is off as well */
+ core99_sound_chip_enable(macio->of_node, 0, 0);
+
+ /*
+ * Save various bits of KeyLargo
+ */
+
+ /* Save the state of the various GPIOs */
+ save_gpio_levels[0] = MACIO_IN32(KEYLARGO_GPIO_LEVELS0);
+ save_gpio_levels[1] = MACIO_IN32(KEYLARGO_GPIO_LEVELS1);
+ for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++)
+ save_gpio_extint[i] = MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+i);
+ for (i=0; i<KEYLARGO_GPIO_CNT; i++)
+ save_gpio_normal[i] = MACIO_IN8(KEYLARGO_GPIO_0+i);
+
+ /* Save the FCRs */
+ if (macio->type == macio_keylargo)
+ save_mbcr = MACIO_IN32(KEYLARGO_MBCR);
+ save_fcr[0] = MACIO_IN32(KEYLARGO_FCR0);
+ save_fcr[1] = MACIO_IN32(KEYLARGO_FCR1);
+ save_fcr[2] = MACIO_IN32(KEYLARGO_FCR2);
+ save_fcr[3] = MACIO_IN32(KEYLARGO_FCR3);
+ save_fcr[4] = MACIO_IN32(KEYLARGO_FCR4);
+ if (macio->type == macio_pangea || macio->type == macio_intrepid)
+ save_fcr[5] = MACIO_IN32(KEYLARGO_FCR5);
+
+ /* Save state & config of DBDMA channels */
+ dbdma_save(macio, save_dbdma);
+
+ /*
+ * Turn off as much as we can
+ */
+ if (macio->type == macio_pangea)
+ pangea_shutdown(macio, 1);
+ else if (macio->type == macio_intrepid)
+ intrepid_shutdown(macio, 1);
+ else if (macio->type == macio_keylargo)
+ keylargo_shutdown(macio, 1);
+
+ /*
+ * Put the host bridge to sleep
+ */
+
+ save_unin_clock_ctl = UN_IN(UNI_N_CLOCK_CNTL);
+ /* Note: do not switch GMAC off, driver does it when necessary, WOL must keep it
+ * enabled !
+ */
+ UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl &
+ ~(/*UNI_N_CLOCK_CNTL_GMAC|*/UNI_N_CLOCK_CNTL_FW/*|UNI_N_CLOCK_CNTL_PCI*/));
+ udelay(100);
+ UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING);
+ UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_SLEEP);
+ mdelay(10);
+
+ /*
+ * FIXME: A bit of black magic with OpenPIC (don't ask me why)
+ */
+ if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) {
+ MACIO_BIS(0x506e0, 0x00400000);
+ MACIO_BIS(0x506e0, 0x80000000);
+ }
+ return 0;
+}
+
+static int
+core99_wake_up(void)
+{
+ struct macio_chip *macio;
+ int i;
+
+ macio = &macio_chips[0];
+ if (macio->type != macio_keylargo && macio->type != macio_pangea &&
+ macio->type != macio_intrepid)
+ return -ENODEV;
+
+ /*
+ * Wakeup the host bridge
+ */
+ UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL);
+ udelay(10);
+ UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING);
+ udelay(10);
+
+ /*
+ * Restore KeyLargo
+ */
+
+ if (macio->type == macio_keylargo) {
+ MACIO_OUT32(KEYLARGO_MBCR, save_mbcr);
+ (void)MACIO_IN32(KEYLARGO_MBCR); udelay(10);
+ }
+ MACIO_OUT32(KEYLARGO_FCR0, save_fcr[0]);
+ (void)MACIO_IN32(KEYLARGO_FCR0); udelay(10);
+ MACIO_OUT32(KEYLARGO_FCR1, save_fcr[1]);
+ (void)MACIO_IN32(KEYLARGO_FCR1); udelay(10);
+ MACIO_OUT32(KEYLARGO_FCR2, save_fcr[2]);
+ (void)MACIO_IN32(KEYLARGO_FCR2); udelay(10);
+ MACIO_OUT32(KEYLARGO_FCR3, save_fcr[3]);
+ (void)MACIO_IN32(KEYLARGO_FCR3); udelay(10);
+ MACIO_OUT32(KEYLARGO_FCR4, save_fcr[4]);
+ (void)MACIO_IN32(KEYLARGO_FCR4); udelay(10);
+ if (macio->type == macio_pangea || macio->type == macio_intrepid) {
+ MACIO_OUT32(KEYLARGO_FCR5, save_fcr[5]);
+ (void)MACIO_IN32(KEYLARGO_FCR5); udelay(10);
+ }
+
+ dbdma_restore(macio, save_dbdma);
+
+ MACIO_OUT32(KEYLARGO_GPIO_LEVELS0, save_gpio_levels[0]);
+ MACIO_OUT32(KEYLARGO_GPIO_LEVELS1, save_gpio_levels[1]);
+ for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++)
+ MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+i, save_gpio_extint[i]);
+ for (i=0; i<KEYLARGO_GPIO_CNT; i++)
+ MACIO_OUT8(KEYLARGO_GPIO_0+i, save_gpio_normal[i]);
+
+ /* FIXME more black magic with OpenPIC ... */
+ if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) {
+ MACIO_BIC(0x506e0, 0x00400000);
+ MACIO_BIC(0x506e0, 0x80000000);
+ }
+
+ UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl);
+ udelay(100);
+
+ return 0;
+}
+
+#endif /* CONFIG_PM */
+
+static long
+core99_sleep_state(struct device_node *node, long param, long value)
+{
+ /* Param == 1 means to enter the "fake sleep" mode that is
+ * used for CPU speed switch
+ */
+ if (param == 1) {
+ if (value == 1) {
+ UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING);
+ UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_IDLE2);
+ } else {
+ UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL);
+ udelay(10);
+ UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING);
+ udelay(10);
+ }
+ return 0;
+ }
+ if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0)
+ return -EPERM;
+
+#ifdef CONFIG_PM
+ if (value == 1)
+ return core99_sleep();
+ else if (value == 0)
+ return core99_wake_up();
+
+#endif /* CONFIG_PM */
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+static long
+generic_dev_can_wake(struct device_node *node, long param, long value)
+{
+ /* Todo: eventually check we are really dealing with on-board
+ * video device ...
+ */
+
+ if (pmac_mb.board_flags & PMAC_MB_MAY_SLEEP)
+ pmac_mb.board_flags |= PMAC_MB_CAN_SLEEP;
+ return 0;
+}
+
+static long generic_get_mb_info(struct device_node *node, long param, long value)
+{
+ switch(param) {
+ case PMAC_MB_INFO_MODEL:
+ return pmac_mb.model_id;
+ case PMAC_MB_INFO_FLAGS:
+ return pmac_mb.board_flags;
+ case PMAC_MB_INFO_NAME:
+ /* hack hack hack... but should work */
+ *((const char **)value) = pmac_mb.model_name;
+ return 0;
+ }
+ return -EINVAL;
+}
+
+
+/*
+ * Table definitions
+ */
+
+/* Used on any machine
+ */
+static struct feature_table_entry any_features[] = {
+ { PMAC_FTR_GET_MB_INFO, generic_get_mb_info },
+ { PMAC_FTR_DEVICE_CAN_WAKE, generic_dev_can_wake },
+ { 0, NULL }
+};
+
+#ifndef CONFIG_PPC64
+
+/* OHare based motherboards. Currently, we only use these on the
+ * 2400,3400 and 3500 series powerbooks. Some older desktops seem
+ * to have issues with turning on/off those asic cells
+ */
+static struct feature_table_entry ohare_features[] = {
+ { PMAC_FTR_SCC_ENABLE, ohare_htw_scc_enable },
+ { PMAC_FTR_SWIM3_ENABLE, ohare_floppy_enable },
+ { PMAC_FTR_MESH_ENABLE, ohare_mesh_enable },
+ { PMAC_FTR_IDE_ENABLE, ohare_ide_enable},
+ { PMAC_FTR_IDE_RESET, ohare_ide_reset},
+ { PMAC_FTR_SLEEP_STATE, ohare_sleep_state },
+ { 0, NULL }
+};
+
+/* Heathrow desktop machines (Beige G3).
+ * Separated as some features couldn't be properly tested
+ * and the serial port control bits appear to confuse it.
+ */
+static struct feature_table_entry heathrow_desktop_features[] = {
+ { PMAC_FTR_SWIM3_ENABLE, heathrow_floppy_enable },
+ { PMAC_FTR_MESH_ENABLE, heathrow_mesh_enable },
+ { PMAC_FTR_IDE_ENABLE, heathrow_ide_enable },
+ { PMAC_FTR_IDE_RESET, heathrow_ide_reset },
+ { PMAC_FTR_BMAC_ENABLE, heathrow_bmac_enable },
+ { 0, NULL }
+};
+
+/* Heathrow based laptop, that is the Wallstreet and mainstreet
+ * powerbooks.
+ */
+static struct feature_table_entry heathrow_laptop_features[] = {
+ { PMAC_FTR_SCC_ENABLE, ohare_htw_scc_enable },
+ { PMAC_FTR_MODEM_ENABLE, heathrow_modem_enable },
+ { PMAC_FTR_SWIM3_ENABLE, heathrow_floppy_enable },
+ { PMAC_FTR_MESH_ENABLE, heathrow_mesh_enable },
+ { PMAC_FTR_IDE_ENABLE, heathrow_ide_enable },
+ { PMAC_FTR_IDE_RESET, heathrow_ide_reset },
+ { PMAC_FTR_BMAC_ENABLE, heathrow_bmac_enable },
+ { PMAC_FTR_SOUND_CHIP_ENABLE, heathrow_sound_enable },
+ { PMAC_FTR_SLEEP_STATE, heathrow_sleep_state },
+ { 0, NULL }
+};
+
+/* Paddington based machines
+ * The lombard (101) powerbook, first iMac models, B&W G3 and Yikes G4.
+ */
+static struct feature_table_entry paddington_features[] = {
+ { PMAC_FTR_SCC_ENABLE, ohare_htw_scc_enable },
+ { PMAC_FTR_MODEM_ENABLE, heathrow_modem_enable },
+ { PMAC_FTR_SWIM3_ENABLE, heathrow_floppy_enable },
+ { PMAC_FTR_MESH_ENABLE, heathrow_mesh_enable },
+ { PMAC_FTR_IDE_ENABLE, heathrow_ide_enable },
+ { PMAC_FTR_IDE_RESET, heathrow_ide_reset },
+ { PMAC_FTR_BMAC_ENABLE, heathrow_bmac_enable },
+ { PMAC_FTR_SOUND_CHIP_ENABLE, heathrow_sound_enable },
+ { PMAC_FTR_SLEEP_STATE, heathrow_sleep_state },
+ { 0, NULL }
+};
+
+/* Core99 & MacRISC 2 machines (all machines released since the
+ * iBook (included), that is all AGP machines, except pangea
+ * chipset. The pangea chipset is the "combo" UniNorth/KeyLargo
+ * used on iBook2 & iMac "flow power".
+ */
+static struct feature_table_entry core99_features[] = {
+ { PMAC_FTR_SCC_ENABLE, core99_scc_enable },
+ { PMAC_FTR_MODEM_ENABLE, core99_modem_enable },
+ { PMAC_FTR_IDE_ENABLE, core99_ide_enable },
+ { PMAC_FTR_IDE_RESET, core99_ide_reset },
+ { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable },
+ { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset },
+ { PMAC_FTR_SOUND_CHIP_ENABLE, core99_sound_chip_enable },
+ { PMAC_FTR_AIRPORT_ENABLE, core99_airport_enable },
+ { PMAC_FTR_USB_ENABLE, core99_usb_enable },
+ { PMAC_FTR_1394_ENABLE, core99_firewire_enable },
+ { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power },
+#ifdef CONFIG_PM
+ { PMAC_FTR_SLEEP_STATE, core99_sleep_state },
+#endif
+#ifdef CONFIG_SMP
+ { PMAC_FTR_RESET_CPU, core99_reset_cpu },
+#endif /* CONFIG_SMP */
+ { PMAC_FTR_READ_GPIO, core99_read_gpio },
+ { PMAC_FTR_WRITE_GPIO, core99_write_gpio },
+ { 0, NULL }
+};
+
+/* RackMac
+ */
+static struct feature_table_entry rackmac_features[] = {
+ { PMAC_FTR_SCC_ENABLE, core99_scc_enable },
+ { PMAC_FTR_IDE_ENABLE, core99_ide_enable },
+ { PMAC_FTR_IDE_RESET, core99_ide_reset },
+ { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable },
+ { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset },
+ { PMAC_FTR_USB_ENABLE, core99_usb_enable },
+ { PMAC_FTR_1394_ENABLE, core99_firewire_enable },
+ { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power },
+ { PMAC_FTR_SLEEP_STATE, core99_sleep_state },
+#ifdef CONFIG_SMP
+ { PMAC_FTR_RESET_CPU, core99_reset_cpu },
+#endif /* CONFIG_SMP */
+ { PMAC_FTR_READ_GPIO, core99_read_gpio },
+ { PMAC_FTR_WRITE_GPIO, core99_write_gpio },
+ { 0, NULL }
+};
+
+/* Pangea features
+ */
+static struct feature_table_entry pangea_features[] = {
+ { PMAC_FTR_SCC_ENABLE, core99_scc_enable },
+ { PMAC_FTR_MODEM_ENABLE, pangea_modem_enable },
+ { PMAC_FTR_IDE_ENABLE, core99_ide_enable },
+ { PMAC_FTR_IDE_RESET, core99_ide_reset },
+ { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable },
+ { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset },
+ { PMAC_FTR_SOUND_CHIP_ENABLE, core99_sound_chip_enable },
+ { PMAC_FTR_AIRPORT_ENABLE, core99_airport_enable },
+ { PMAC_FTR_USB_ENABLE, core99_usb_enable },
+ { PMAC_FTR_1394_ENABLE, core99_firewire_enable },
+ { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power },
+ { PMAC_FTR_SLEEP_STATE, core99_sleep_state },
+ { PMAC_FTR_READ_GPIO, core99_read_gpio },
+ { PMAC_FTR_WRITE_GPIO, core99_write_gpio },
+ { 0, NULL }
+};
+
+/* Intrepid features
+ */
+static struct feature_table_entry intrepid_features[] = {
+ { PMAC_FTR_SCC_ENABLE, core99_scc_enable },
+ { PMAC_FTR_MODEM_ENABLE, pangea_modem_enable },
+ { PMAC_FTR_IDE_ENABLE, core99_ide_enable },
+ { PMAC_FTR_IDE_RESET, core99_ide_reset },
+ { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable },
+ { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset },
+ { PMAC_FTR_SOUND_CHIP_ENABLE, core99_sound_chip_enable },
+ { PMAC_FTR_AIRPORT_ENABLE, core99_airport_enable },
+ { PMAC_FTR_USB_ENABLE, core99_usb_enable },
+ { PMAC_FTR_1394_ENABLE, core99_firewire_enable },
+ { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power },
+ { PMAC_FTR_SLEEP_STATE, core99_sleep_state },
+ { PMAC_FTR_READ_GPIO, core99_read_gpio },
+ { PMAC_FTR_WRITE_GPIO, core99_write_gpio },
+ { PMAC_FTR_AACK_DELAY_ENABLE, intrepid_aack_delay_enable },
+ { 0, NULL }
+};
+
+#else /* CONFIG_PPC64 */
+
+/* G5 features
+ */
+static struct feature_table_entry g5_features[] = {
+ { PMAC_FTR_GMAC_ENABLE, g5_gmac_enable },
+ { PMAC_FTR_1394_ENABLE, g5_fw_enable },
+ { PMAC_FTR_ENABLE_MPIC, g5_mpic_enable },
+ { PMAC_FTR_GMAC_PHY_RESET, g5_eth_phy_reset },
+ { PMAC_FTR_SOUND_CHIP_ENABLE, g5_i2s_enable },
+#ifdef CONFIG_SMP
+ { PMAC_FTR_RESET_CPU, g5_reset_cpu },
+#endif /* CONFIG_SMP */
+ { PMAC_FTR_READ_GPIO, core99_read_gpio },
+ { PMAC_FTR_WRITE_GPIO, core99_write_gpio },
+ { 0, NULL }
+};
+
+#endif /* CONFIG_PPC64 */
+
+static struct pmac_mb_def pmac_mb_defs[] = {
+#ifndef CONFIG_PPC64
+ /*
+ * Desktops
+ */
+
+ { "AAPL,8500", "PowerMac 8500/8600",
+ PMAC_TYPE_PSURGE, NULL,
+ 0
+ },
+ { "AAPL,9500", "PowerMac 9500/9600",
+ PMAC_TYPE_PSURGE, NULL,
+ 0
+ },
+ { "AAPL,7200", "PowerMac 7200",
+ PMAC_TYPE_PSURGE, NULL,
+ 0
+ },
+ { "AAPL,7300", "PowerMac 7200/7300",
+ PMAC_TYPE_PSURGE, NULL,
+ 0
+ },
+ { "AAPL,7500", "PowerMac 7500",
+ PMAC_TYPE_PSURGE, NULL,
+ 0
+ },
+ { "AAPL,ShinerESB", "Apple Network Server",
+ PMAC_TYPE_ANS, NULL,
+ 0
+ },
+ { "AAPL,e407", "Alchemy",
+ PMAC_TYPE_ALCHEMY, NULL,
+ 0
+ },
+ { "AAPL,e411", "Gazelle",
+ PMAC_TYPE_GAZELLE, NULL,
+ 0
+ },
+ { "AAPL,Gossamer", "PowerMac G3 (Gossamer)",
+ PMAC_TYPE_GOSSAMER, heathrow_desktop_features,
+ 0
+ },
+ { "AAPL,PowerMac G3", "PowerMac G3 (Silk)",
+ PMAC_TYPE_SILK, heathrow_desktop_features,
+ 0
+ },
+ { "PowerMac1,1", "Blue&White G3",
+ PMAC_TYPE_YOSEMITE, paddington_features,
+ 0
+ },
+ { "PowerMac1,2", "PowerMac G4 PCI Graphics",
+ PMAC_TYPE_YIKES, paddington_features,
+ 0
+ },
+ { "PowerMac2,1", "iMac FireWire",
+ PMAC_TYPE_FW_IMAC, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+ },
+ { "PowerMac2,2", "iMac FireWire",
+ PMAC_TYPE_FW_IMAC, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+ },
+ { "PowerMac3,1", "PowerMac G4 AGP Graphics",
+ PMAC_TYPE_SAWTOOTH, core99_features,
+ PMAC_MB_OLD_CORE99
+ },
+ { "PowerMac3,2", "PowerMac G4 AGP Graphics",
+ PMAC_TYPE_SAWTOOTH, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+ },
+ { "PowerMac3,3", "PowerMac G4 AGP Graphics",
+ PMAC_TYPE_SAWTOOTH, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+ },
+ { "PowerMac3,4", "PowerMac G4 Silver",
+ PMAC_TYPE_QUICKSILVER, core99_features,
+ PMAC_MB_MAY_SLEEP
+ },
+ { "PowerMac3,5", "PowerMac G4 Silver",
+ PMAC_TYPE_QUICKSILVER, core99_features,
+ PMAC_MB_MAY_SLEEP
+ },
+ { "PowerMac3,6", "PowerMac G4 Windtunnel",
+ PMAC_TYPE_WINDTUNNEL, core99_features,
+ PMAC_MB_MAY_SLEEP,
+ },
+ { "PowerMac4,1", "iMac \"Flower Power\"",
+ PMAC_TYPE_PANGEA_IMAC, pangea_features,
+ PMAC_MB_MAY_SLEEP
+ },
+ { "PowerMac4,2", "Flat panel iMac",
+ PMAC_TYPE_FLAT_PANEL_IMAC, pangea_features,
+ PMAC_MB_CAN_SLEEP
+ },
+ { "PowerMac4,4", "eMac",
+ PMAC_TYPE_EMAC, core99_features,
+ PMAC_MB_MAY_SLEEP
+ },
+ { "PowerMac5,1", "PowerMac G4 Cube",
+ PMAC_TYPE_CUBE, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99
+ },
+ { "PowerMac6,1", "Flat panel iMac",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP,
+ },
+ { "PowerMac6,3", "Flat panel iMac",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP,
+ },
+ { "PowerMac6,4", "eMac",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP,
+ },
+ { "PowerMac10,1", "Mac mini",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP,
+ },
+ { "PowerMac10,2", "Mac mini (Late 2005)",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP,
+ },
+ { "iMac,1", "iMac (first generation)",
+ PMAC_TYPE_ORIG_IMAC, paddington_features,
+ 0
+ },
+
+ /*
+ * Xserve's
+ */
+
+ { "RackMac1,1", "XServe",
+ PMAC_TYPE_RACKMAC, rackmac_features,
+ 0,
+ },
+ { "RackMac1,2", "XServe rev. 2",
+ PMAC_TYPE_RACKMAC, rackmac_features,
+ 0,
+ },
+
+ /*
+ * Laptops
+ */
+
+ { "AAPL,3400/2400", "PowerBook 3400",
+ PMAC_TYPE_HOOPER, ohare_features,
+ PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+ },
+ { "AAPL,3500", "PowerBook 3500",
+ PMAC_TYPE_KANGA, ohare_features,
+ PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+ },
+ { "AAPL,PowerBook1998", "PowerBook Wallstreet",
+ PMAC_TYPE_WALLSTREET, heathrow_laptop_features,
+ PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+ },
+ { "PowerBook1,1", "PowerBook 101 (Lombard)",
+ PMAC_TYPE_101_PBOOK, paddington_features,
+ PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE
+ },
+ { "PowerBook2,1", "iBook (first generation)",
+ PMAC_TYPE_ORIG_IBOOK, core99_features,
+ PMAC_MB_CAN_SLEEP | PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+ },
+ { "PowerBook2,2", "iBook FireWire",
+ PMAC_TYPE_FW_IBOOK, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER |
+ PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+ },
+ { "PowerBook3,1", "PowerBook Pismo",
+ PMAC_TYPE_PISMO, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER |
+ PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE
+ },
+ { "PowerBook3,2", "PowerBook Titanium",
+ PMAC_TYPE_TITANIUM, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+ },
+ { "PowerBook3,3", "PowerBook Titanium II",
+ PMAC_TYPE_TITANIUM2, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+ },
+ { "PowerBook3,4", "PowerBook Titanium III",
+ PMAC_TYPE_TITANIUM3, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+ },
+ { "PowerBook3,5", "PowerBook Titanium IV",
+ PMAC_TYPE_TITANIUM4, core99_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+ },
+ { "PowerBook4,1", "iBook 2",
+ PMAC_TYPE_IBOOK2, pangea_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+ },
+ { "PowerBook4,2", "iBook 2",
+ PMAC_TYPE_IBOOK2, pangea_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+ },
+ { "PowerBook4,3", "iBook 2 rev. 2",
+ PMAC_TYPE_IBOOK2, pangea_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE
+ },
+ { "PowerBook5,1", "PowerBook G4 17\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook5,2", "PowerBook G4 15\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook5,3", "PowerBook G4 17\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook5,4", "PowerBook G4 15\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook5,5", "PowerBook G4 17\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook5,6", "PowerBook G4 15\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook5,7", "PowerBook G4 17\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook5,8", "PowerBook G4 15\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_MOBILE,
+ },
+ { "PowerBook5,9", "PowerBook G4 17\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_MOBILE,
+ },
+ { "PowerBook6,1", "PowerBook G4 12\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook6,2", "PowerBook G4",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook6,3", "iBook G4",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook6,4", "PowerBook G4 12\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook6,5", "iBook G4",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook6,7", "iBook G4",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+ { "PowerBook6,8", "PowerBook G4 12\"",
+ PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features,
+ PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE,
+ },
+#else /* CONFIG_PPC64 */
+ { "PowerMac7,2", "PowerMac G5",
+ PMAC_TYPE_POWERMAC_G5, g5_features,
+ 0,
+ },
+#ifdef CONFIG_PPC64
+ { "PowerMac7,3", "PowerMac G5",
+ PMAC_TYPE_POWERMAC_G5, g5_features,
+ 0,
+ },
+ { "PowerMac8,1", "iMac G5",
+ PMAC_TYPE_IMAC_G5, g5_features,
+ 0,
+ },
+ { "PowerMac9,1", "PowerMac G5",
+ PMAC_TYPE_POWERMAC_G5_U3L, g5_features,
+ 0,
+ },
+ { "PowerMac11,2", "PowerMac G5 Dual Core",
+ PMAC_TYPE_POWERMAC_G5_U3L, g5_features,
+ 0,
+ },
+ { "PowerMac12,1", "iMac G5 (iSight)",
+ PMAC_TYPE_POWERMAC_G5_U3L, g5_features,
+ 0,
+ },
+ { "RackMac3,1", "XServe G5",
+ PMAC_TYPE_XSERVE_G5, g5_features,
+ 0,
+ },
+#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC64 */
+};
+
+/*
+ * The toplevel feature_call callback
+ */
+long pmac_do_feature_call(unsigned int selector, ...)
+{
+ struct device_node *node;
+ long param, value;
+ int i;
+ feature_call func = NULL;
+ va_list args;
+
+ if (pmac_mb.features)
+ for (i=0; pmac_mb.features[i].function; i++)
+ if (pmac_mb.features[i].selector == selector) {
+ func = pmac_mb.features[i].function;
+ break;
+ }
+ if (!func)
+ for (i=0; any_features[i].function; i++)
+ if (any_features[i].selector == selector) {
+ func = any_features[i].function;
+ break;
+ }
+ if (!func)
+ return -ENODEV;
+
+ va_start(args, selector);
+ node = (struct device_node*)va_arg(args, void*);
+ param = va_arg(args, long);
+ value = va_arg(args, long);
+ va_end(args);
+
+ return func(node, param, value);
+}
+
+static int __init probe_motherboard(void)
+{
+ int i;
+ struct macio_chip *macio = &macio_chips[0];
+ const char *model = NULL;
+ struct device_node *dt;
+ int ret = 0;
+
+ /* Lookup known motherboard type in device-tree. First try an
+ * exact match on the "model" property, then try a "compatible"
+ * match is none is found.
+ */
+ dt = of_find_node_by_name(NULL, "device-tree");
+ if (dt != NULL)
+ model = of_get_property(dt, "model", NULL);
+ for(i=0; model && i<ARRAY_SIZE(pmac_mb_defs); i++) {
+ if (strcmp(model, pmac_mb_defs[i].model_string) == 0) {
+ pmac_mb = pmac_mb_defs[i];
+ goto found;
+ }
+ }
+ for(i=0; i<ARRAY_SIZE(pmac_mb_defs); i++) {
+ if (of_machine_is_compatible(pmac_mb_defs[i].model_string)) {
+ pmac_mb = pmac_mb_defs[i];
+ goto found;
+ }
+ }
+
+ /* Fallback to selection depending on mac-io chip type */
+ switch(macio->type) {
+#ifndef CONFIG_PPC64
+ case macio_grand_central:
+ pmac_mb.model_id = PMAC_TYPE_PSURGE;
+ pmac_mb.model_name = "Unknown PowerSurge";
+ break;
+ case macio_ohare:
+ pmac_mb.model_id = PMAC_TYPE_UNKNOWN_OHARE;
+ pmac_mb.model_name = "Unknown OHare-based";
+ break;
+ case macio_heathrow:
+ pmac_mb.model_id = PMAC_TYPE_UNKNOWN_HEATHROW;
+ pmac_mb.model_name = "Unknown Heathrow-based";
+ pmac_mb.features = heathrow_desktop_features;
+ break;
+ case macio_paddington:
+ pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PADDINGTON;
+ pmac_mb.model_name = "Unknown Paddington-based";
+ pmac_mb.features = paddington_features;
+ break;
+ case macio_keylargo:
+ pmac_mb.model_id = PMAC_TYPE_UNKNOWN_CORE99;
+ pmac_mb.model_name = "Unknown Keylargo-based";
+ pmac_mb.features = core99_features;
+ break;
+ case macio_pangea:
+ pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PANGEA;
+ pmac_mb.model_name = "Unknown Pangea-based";
+ pmac_mb.features = pangea_features;
+ break;
+ case macio_intrepid:
+ pmac_mb.model_id = PMAC_TYPE_UNKNOWN_INTREPID;
+ pmac_mb.model_name = "Unknown Intrepid-based";
+ pmac_mb.features = intrepid_features;
+ break;
+#else /* CONFIG_PPC64 */
+ case macio_keylargo2:
+ pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2;
+ pmac_mb.model_name = "Unknown K2-based";
+ pmac_mb.features = g5_features;
+ break;
+ case macio_shasta:
+ pmac_mb.model_id = PMAC_TYPE_UNKNOWN_SHASTA;
+ pmac_mb.model_name = "Unknown Shasta-based";
+ pmac_mb.features = g5_features;
+ break;
+#endif /* CONFIG_PPC64 */
+ default:
+ ret = -ENODEV;
+ goto done;
+ }
+found:
+#ifndef CONFIG_PPC64
+ /* Fixup Hooper vs. Comet */
+ if (pmac_mb.model_id == PMAC_TYPE_HOOPER) {
+ u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4);
+ if (!mach_id_ptr) {
+ ret = -ENODEV;
+ goto done;
+ }
+ /* Here, I used to disable the media-bay on comet. It
+ * appears this is wrong, the floppy connector is actually
+ * a kind of media-bay and works with the current driver.
+ */
+ if (__raw_readl(mach_id_ptr) & 0x20000000UL)
+ pmac_mb.model_id = PMAC_TYPE_COMET;
+ iounmap(mach_id_ptr);
+ }
+
+ /* Set default value of powersave_nap on machines that support it.
+ * It appears that uninorth rev 3 has a problem with it, we don't
+ * enable it on those. In theory, the flush-on-lock property is
+ * supposed to be set when not supported, but I'm not very confident
+ * that all Apple OF revs did it properly, I do it the paranoid way.
+ */
+ if (uninorth_base && uninorth_rev > 3) {
+ struct device_node *np;
+
+ for_each_of_cpu_node(np) {
+ int cpu_count = 1;
+
+ /* Nap mode not supported on SMP */
+ if (of_property_read_bool(np, "flush-on-lock") ||
+ (cpu_count > 1)) {
+ powersave_nap = 0;
+ of_node_put(np);
+ break;
+ }
+
+ cpu_count++;
+ powersave_nap = 1;
+ }
+ }
+ if (powersave_nap)
+ printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
+
+ /* On CPUs that support it (750FX), lowspeed by default during
+ * NAP mode
+ */
+ powersave_lowspeed = 1;
+
+#else /* CONFIG_PPC64 */
+ powersave_nap = 1;
+#endif /* CONFIG_PPC64 */
+
+ /* Check for "mobile" machine */
+ if (model && (strncmp(model, "PowerBook", 9) == 0
+ || strncmp(model, "iBook", 5) == 0))
+ pmac_mb.board_flags |= PMAC_MB_MOBILE;
+
+
+ printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name);
+done:
+ of_node_put(dt);
+ return ret;
+}
+
+/* Initialize the Core99 UniNorth host bridge and memory controller
+ */
+static void __init probe_uninorth(void)
+{
+ struct resource res;
+ unsigned long actrl;
+
+ /* Locate core99 Uni-N */
+ uninorth_node = of_find_node_by_name(NULL, "uni-n");
+ uninorth_maj = 1;
+
+ /* Locate G5 u3 */
+ if (uninorth_node == NULL) {
+ uninorth_node = of_find_node_by_name(NULL, "u3");
+ uninorth_maj = 3;
+ }
+ /* Locate G5 u4 */
+ if (uninorth_node == NULL) {
+ uninorth_node = of_find_node_by_name(NULL, "u4");
+ uninorth_maj = 4;
+ }
+ if (uninorth_node == NULL) {
+ uninorth_maj = 0;
+ return;
+ }
+
+ if (of_address_to_resource(uninorth_node, 0, &res))
+ return;
+
+ uninorth_base = ioremap(res.start, 0x40000);
+ if (uninorth_base == NULL)
+ return;
+ uninorth_rev = in_be32(UN_REG(UNI_N_VERSION));
+ if (uninorth_maj == 3 || uninorth_maj == 4) {
+ u3_ht_base = ioremap(res.start + U3_HT_CONFIG_BASE, 0x1000);
+ if (u3_ht_base == NULL) {
+ iounmap(uninorth_base);
+ return;
+ }
+ }
+
+ printk(KERN_INFO "Found %s memory controller & host bridge"
+ " @ 0x%08x revision: 0x%02x\n", uninorth_maj == 3 ? "U3" :
+ uninorth_maj == 4 ? "U4" : "UniNorth",
+ (unsigned int)res.start, uninorth_rev);
+ printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base);
+
+ /* Set the arbitrer QAck delay according to what Apple does
+ */
+ if (uninorth_rev < 0x11) {
+ actrl = UN_IN(UNI_N_ARB_CTRL) & ~UNI_N_ARB_CTRL_QACK_DELAY_MASK;
+ actrl |= ((uninorth_rev < 3) ? UNI_N_ARB_CTRL_QACK_DELAY105 :
+ UNI_N_ARB_CTRL_QACK_DELAY) <<
+ UNI_N_ARB_CTRL_QACK_DELAY_SHIFT;
+ UN_OUT(UNI_N_ARB_CTRL, actrl);
+ }
+
+ /* Some more magic as done by them in recent MacOS X on UniNorth
+ * revs 1.5 to 2.O and Pangea. Seem to toggle the UniN Maxbus/PCI
+ * memory timeout
+ */
+ if ((uninorth_rev >= 0x11 && uninorth_rev <= 0x24) ||
+ uninorth_rev == 0xc0)
+ UN_OUT(0x2160, UN_IN(0x2160) & 0x00ffffff);
+}
+
+static void __init probe_one_macio(const char *name, const char *compat, int type)
+{
+ struct device_node* node;
+ int i;
+ volatile u32 __iomem *base;
+ const u32 *addrp, *revp;
+ phys_addr_t addr;
+ u64 size;
+
+ for_each_node_by_name(node, name) {
+ if (!compat)
+ break;
+ if (of_device_is_compatible(node, compat))
+ break;
+ }
+ if (!node)
+ return;
+ for(i=0; i<MAX_MACIO_CHIPS; i++) {
+ if (!macio_chips[i].of_node)
+ break;
+ if (macio_chips[i].of_node == node)
+ goto out_put;
+ }
+
+ if (i >= MAX_MACIO_CHIPS) {
+ printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n");
+ printk(KERN_ERR "pmac_feature: %pOF skipped\n", node);
+ goto out_put;
+ }
+ addrp = of_get_pci_address(node, 0, &size, NULL);
+ if (addrp == NULL) {
+ printk(KERN_ERR "pmac_feature: %pOF: can't find base !\n",
+ node);
+ goto out_put;
+ }
+ addr = of_translate_address(node, addrp);
+ if (addr == 0) {
+ printk(KERN_ERR "pmac_feature: %pOF, can't translate base !\n",
+ node);
+ goto out_put;
+ }
+ base = ioremap(addr, (unsigned long)size);
+ if (!base) {
+ printk(KERN_ERR "pmac_feature: %pOF, can't map mac-io chip !\n",
+ node);
+ goto out_put;
+ }
+ if (type == macio_keylargo || type == macio_keylargo2) {
+ const u32 *did = of_get_property(node, "device-id", NULL);
+ if (*did == 0x00000025)
+ type = macio_pangea;
+ if (*did == 0x0000003e)
+ type = macio_intrepid;
+ if (*did == 0x0000004f)
+ type = macio_shasta;
+ }
+ macio_chips[i].of_node = node;
+ macio_chips[i].type = type;
+ macio_chips[i].base = base;
+ macio_chips[i].flags = MACIO_FLAG_SCCA_ON | MACIO_FLAG_SCCB_ON;
+ macio_chips[i].name = macio_names[type];
+ revp = of_get_property(node, "revision-id", NULL);
+ if (revp)
+ macio_chips[i].rev = *revp;
+ printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n",
+ macio_names[type], macio_chips[i].rev, macio_chips[i].base);
+
+ return;
+
+out_put:
+ of_node_put(node);
+}
+
+static int __init
+probe_macios(void)
+{
+ /* Warning, ordering is important */
+ probe_one_macio("gc", NULL, macio_grand_central);
+ probe_one_macio("ohare", NULL, macio_ohare);
+ probe_one_macio("pci106b,7", NULL, macio_ohareII);
+ probe_one_macio("mac-io", "keylargo", macio_keylargo);
+ probe_one_macio("mac-io", "paddington", macio_paddington);
+ probe_one_macio("mac-io", "gatwick", macio_gatwick);
+ probe_one_macio("mac-io", "heathrow", macio_heathrow);
+ probe_one_macio("mac-io", "K2-Keylargo", macio_keylargo2);
+
+ /* Make sure the "main" macio chip appear first */
+ if (macio_chips[0].type == macio_gatwick
+ && macio_chips[1].type == macio_heathrow) {
+ struct macio_chip temp = macio_chips[0];
+ macio_chips[0] = macio_chips[1];
+ macio_chips[1] = temp;
+ }
+ if (macio_chips[0].type == macio_ohareII
+ && macio_chips[1].type == macio_ohare) {
+ struct macio_chip temp = macio_chips[0];
+ macio_chips[0] = macio_chips[1];
+ macio_chips[1] = temp;
+ }
+ macio_chips[0].lbus.index = 0;
+ macio_chips[1].lbus.index = 1;
+
+ return (macio_chips[0].of_node == NULL) ? -ENODEV : 0;
+}
+
+static void __init
+initial_serial_shutdown(struct device_node *np)
+{
+ int len;
+ const struct slot_names_prop {
+ int count;
+ char name[1];
+ } *slots;
+ const char *conn;
+ int port_type = PMAC_SCC_ASYNC;
+ int modem = 0;
+
+ slots = of_get_property(np, "slot-names", &len);
+ conn = of_get_property(np, "AAPL,connector", &len);
+ if (conn && (strcmp(conn, "infrared") == 0))
+ port_type = PMAC_SCC_IRDA;
+ else if (of_device_is_compatible(np, "cobalt"))
+ modem = 1;
+ else if (slots && slots->count > 0) {
+ if (strcmp(slots->name, "IrDA") == 0)
+ port_type = PMAC_SCC_IRDA;
+ else if (strcmp(slots->name, "Modem") == 0)
+ modem = 1;
+ }
+ if (modem)
+ pmac_call_feature(PMAC_FTR_MODEM_ENABLE, np, 0, 0);
+ pmac_call_feature(PMAC_FTR_SCC_ENABLE, np, port_type, 0);
+}
+
+static void __init
+set_initial_features(void)
+{
+ struct device_node *np;
+
+ /* That hack appears to be necessary for some StarMax motherboards
+ * but I'm not too sure it was audited for side-effects on other
+ * ohare based machines...
+ * Since I still have difficulties figuring the right way to
+ * differentiate them all and since that hack was there for a long
+ * time, I'll keep it around
+ */
+ if (macio_chips[0].type == macio_ohare) {
+ struct macio_chip *macio = &macio_chips[0];
+ np = of_find_node_by_name(NULL, "via-pmu");
+ if (np)
+ MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+ else
+ MACIO_OUT32(OHARE_FCR, STARMAX_FEATURES);
+ of_node_put(np);
+ } else if (macio_chips[1].type == macio_ohare) {
+ struct macio_chip *macio = &macio_chips[1];
+ MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE);
+ }
+
+#ifdef CONFIG_PPC64
+ if (macio_chips[0].type == macio_keylargo2 ||
+ macio_chips[0].type == macio_shasta) {
+#ifndef CONFIG_SMP
+ /* On SMP machines running UP, we have the second CPU eating
+ * bus cycles. We need to take it off the bus. This is done
+ * from pmac_smp for SMP kernels running on one CPU
+ */
+ np = of_find_node_by_type(NULL, "cpu");
+ if (np != NULL)
+ np = of_find_node_by_type(np, "cpu");
+ if (np != NULL) {
+ g5_phy_disable_cpu1();
+ of_node_put(np);
+ }
+#endif /* CONFIG_SMP */
+ /* Enable GMAC for now for PCI probing. It will be disabled
+ * later on after PCI probe
+ */
+ for_each_node_by_name(np, "ethernet")
+ if (of_device_is_compatible(np, "K2-GMAC"))
+ g5_gmac_enable(np, 0, 1);
+
+ /* Enable FW before PCI probe. Will be disabled later on
+ * Note: We should have a batter way to check that we are
+ * dealing with uninorth internal cell and not a PCI cell
+ * on the external PCI. The code below works though.
+ */
+ for_each_node_by_name(np, "firewire") {
+ if (of_device_is_compatible(np, "pci106b,5811")) {
+ macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
+ g5_fw_enable(np, 0, 1);
+ }
+ }
+ }
+#else /* CONFIG_PPC64 */
+
+ if (macio_chips[0].type == macio_keylargo ||
+ macio_chips[0].type == macio_pangea ||
+ macio_chips[0].type == macio_intrepid) {
+ /* Enable GMAC for now for PCI probing. It will be disabled
+ * later on after PCI probe
+ */
+ for_each_node_by_name(np, "ethernet") {
+ if (np->parent
+ && of_device_is_compatible(np->parent, "uni-north")
+ && of_device_is_compatible(np, "gmac"))
+ core99_gmac_enable(np, 0, 1);
+ }
+
+ /* Enable FW before PCI probe. Will be disabled later on
+ * Note: We should have a batter way to check that we are
+ * dealing with uninorth internal cell and not a PCI cell
+ * on the external PCI. The code below works though.
+ */
+ for_each_node_by_name(np, "firewire") {
+ if (np->parent
+ && of_device_is_compatible(np->parent, "uni-north")
+ && (of_device_is_compatible(np, "pci106b,18") ||
+ of_device_is_compatible(np, "pci106b,30") ||
+ of_device_is_compatible(np, "pci11c1,5811"))) {
+ macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
+ core99_firewire_enable(np, 0, 1);
+ }
+ }
+
+ /* Enable ATA-100 before PCI probe. */
+ for_each_node_by_name(np, "ata-6") {
+ if (np->parent
+ && of_device_is_compatible(np->parent, "uni-north")
+ && of_device_is_compatible(np, "kauai-ata")) {
+ core99_ata100_enable(np, 1);
+ }
+ }
+
+ /* Switch airport off */
+ for_each_node_by_name(np, "radio") {
+ if (np->parent == macio_chips[0].of_node) {
+ macio_chips[0].flags |= MACIO_FLAG_AIRPORT_ON;
+ core99_airport_enable(np, 0, 0);
+ }
+ }
+ }
+
+ /* On all machines that support sound PM, switch sound off */
+ if (macio_chips[0].of_node)
+ pmac_do_feature_call(PMAC_FTR_SOUND_CHIP_ENABLE,
+ macio_chips[0].of_node, 0, 0);
+
+ /* While on some desktop G3s, we turn it back on */
+ if (macio_chips[0].of_node && macio_chips[0].type == macio_heathrow
+ && (pmac_mb.model_id == PMAC_TYPE_GOSSAMER ||
+ pmac_mb.model_id == PMAC_TYPE_SILK)) {
+ struct macio_chip *macio = &macio_chips[0];
+ MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE);
+ MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N);
+ }
+
+#endif /* CONFIG_PPC64 */
+
+ /* On all machines, switch modem & serial ports off */
+ for_each_node_by_name(np, "ch-a")
+ initial_serial_shutdown(np);
+ for_each_node_by_name(np, "ch-b")
+ initial_serial_shutdown(np);
+}
+
+void __init
+pmac_feature_init(void)
+{
+ /* Detect the UniNorth memory controller */
+ probe_uninorth();
+
+ /* Probe mac-io controllers */
+ if (probe_macios()) {
+ printk(KERN_WARNING "No mac-io chip found\n");
+ return;
+ }
+
+ /* Probe machine type */
+ if (probe_motherboard())
+ printk(KERN_WARNING "Unknown PowerMac !\n");
+
+ /* Set some initial features (turn off some chips that will
+ * be later turned on)
+ */
+ set_initial_features();
+}
+
+#if 0
+static void dump_HT_speeds(char *name, u32 cfg, u32 frq)
+{
+ int freqs[16] = { 200,300,400,500,600,800,1000,0,0,0,0,0,0,0,0,0 };
+ int bits[8] = { 8,16,0,32,2,4,0,0 };
+ int freq = (frq >> 8) & 0xf;
+
+ if (freqs[freq] == 0)
+ printk("%s: Unknown HT link frequency %x\n", name, freq);
+ else
+ printk("%s: %d MHz on main link, (%d in / %d out) bits width\n",
+ name, freqs[freq],
+ bits[(cfg >> 28) & 0x7], bits[(cfg >> 24) & 0x7]);
+}
+
+void __init pmac_check_ht_link(void)
+{
+ u32 ufreq, freq, ucfg, cfg;
+ struct device_node *pcix_node;
+ u8 px_bus, px_devfn;
+ struct pci_controller *px_hose;
+
+ (void)in_be32(u3_ht_base + U3_HT_LINK_COMMAND);
+ ucfg = cfg = in_be32(u3_ht_base + U3_HT_LINK_CONFIG);
+ ufreq = freq = in_be32(u3_ht_base + U3_HT_LINK_FREQ);
+ dump_HT_speeds("U3 HyperTransport", cfg, freq);
+
+ pcix_node = of_find_compatible_node(NULL, "pci", "pci-x");
+ if (pcix_node == NULL) {
+ printk("No PCI-X bridge found\n");
+ return;
+ }
+ if (pci_device_from_OF_node(pcix_node, &px_bus, &px_devfn) != 0) {
+ printk("PCI-X bridge found but not matched to pci\n");
+ return;
+ }
+ px_hose = pci_find_hose_for_OF_device(pcix_node);
+ if (px_hose == NULL) {
+ printk("PCI-X bridge found but not matched to host\n");
+ return;
+ }
+ early_read_config_dword(px_hose, px_bus, px_devfn, 0xc4, &cfg);
+ early_read_config_dword(px_hose, px_bus, px_devfn, 0xcc, &freq);
+ dump_HT_speeds("PCI-X HT Uplink", cfg, freq);
+ early_read_config_dword(px_hose, px_bus, px_devfn, 0xc8, &cfg);
+ early_read_config_dword(px_hose, px_bus, px_devfn, 0xd0, &freq);
+ dump_HT_speeds("PCI-X HT Downlink", cfg, freq);
+}
+#endif /* 0 */
+
+/*
+ * Early video resume hook
+ */
+
+static void (*pmac_early_vresume_proc)(void *data);
+static void *pmac_early_vresume_data;
+
+void pmac_set_early_video_resume(void (*proc)(void *data), void *data)
+{
+ if (!machine_is(powermac))
+ return;
+ preempt_disable();
+ pmac_early_vresume_proc = proc;
+ pmac_early_vresume_data = data;
+ preempt_enable();
+}
+EXPORT_SYMBOL(pmac_set_early_video_resume);
+
+void pmac_call_early_video_resume(void)
+{
+ if (pmac_early_vresume_proc)
+ pmac_early_vresume_proc(pmac_early_vresume_data);
+}
+
+/*
+ * AGP related suspend/resume code
+ */
+
+static struct pci_dev *pmac_agp_bridge;
+static int (*pmac_agp_suspend)(struct pci_dev *bridge);
+static int (*pmac_agp_resume)(struct pci_dev *bridge);
+
+void pmac_register_agp_pm(struct pci_dev *bridge,
+ int (*suspend)(struct pci_dev *bridge),
+ int (*resume)(struct pci_dev *bridge))
+{
+ if (suspend || resume) {
+ pmac_agp_bridge = bridge;
+ pmac_agp_suspend = suspend;
+ pmac_agp_resume = resume;
+ return;
+ }
+ if (bridge != pmac_agp_bridge)
+ return;
+ pmac_agp_suspend = pmac_agp_resume = NULL;
+ return;
+}
+EXPORT_SYMBOL(pmac_register_agp_pm);
+
+void pmac_suspend_agp_for_card(struct pci_dev *dev)
+{
+ if (pmac_agp_bridge == NULL || pmac_agp_suspend == NULL)
+ return;
+ if (pmac_agp_bridge->bus != dev->bus)
+ return;
+ pmac_agp_suspend(pmac_agp_bridge);
+}
+EXPORT_SYMBOL(pmac_suspend_agp_for_card);
+
+void pmac_resume_agp_for_card(struct pci_dev *dev)
+{
+ if (pmac_agp_bridge == NULL || pmac_agp_resume == NULL)
+ return;
+ if (pmac_agp_bridge->bus != dev->bus)
+ return;
+ pmac_agp_resume(pmac_agp_bridge);
+}
+EXPORT_SYMBOL(pmac_resume_agp_for_card);
+
+int pmac_get_uninorth_variant(void)
+{
+ return uninorth_maj;
+}
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
new file mode 100644
index 000000000..40f3aa432
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -0,0 +1,1514 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * arch/powerpc/platforms/powermac/low_i2c.c
+ *
+ * Copyright (C) 2003-2005 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * The linux i2c layer isn't completely suitable for our needs for various
+ * reasons ranging from too late initialisation to semantics not perfectly
+ * matching some requirements of the apple platform functions etc...
+ *
+ * This file thus provides a simple low level unified i2c interface for
+ * powermac that covers the various types of i2c busses used in Apple machines.
+ * For now, keywest, PMU and SMU, though we could add Cuda, or other bit
+ * banging busses found on older chipsets in earlier machines if we ever need
+ * one of them.
+ *
+ * The drivers in this file are synchronous/blocking. In addition, the
+ * keywest one is fairly slow due to the use of msleep instead of interrupts
+ * as the interrupt is currently used by i2c-keywest. In the long run, we
+ * might want to get rid of those high-level interfaces to linux i2c layer
+ * either completely (converting all drivers) or replacing them all with a
+ * single stub driver on top of this one. Once done, the interrupt will be
+ * available for our use.
+ */
+
+#undef DEBUG
+#undef DEBUG_LOW
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/delay.h>
+#include <linux/completion.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/mutex.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+#include <asm/keylargo.h>
+#include <asm/uninorth.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/smu.h>
+#include <asm/pmac_pfunc.h>
+#include <asm/pmac_low_i2c.h>
+
+#ifdef DEBUG
+#define DBG(x...) do {\
+ printk(KERN_DEBUG "low_i2c:" x); \
+ } while(0)
+#else
+#define DBG(x...)
+#endif
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(x...) do {\
+ printk(KERN_DEBUG "low_i2c:" x); \
+ } while(0)
+#else
+#define DBG_LOW(x...)
+#endif
+
+
+static int pmac_i2c_force_poll = 1;
+
+/*
+ * A bus structure. Each bus in the system has such a structure associated.
+ */
+struct pmac_i2c_bus
+{
+ struct list_head link;
+ struct device_node *controller;
+ struct device_node *busnode;
+ int type;
+ int flags;
+ struct i2c_adapter adapter;
+ void *hostdata;
+ int channel; /* some hosts have multiple */
+ int mode; /* current mode */
+ struct mutex mutex;
+ int opened;
+ int polled; /* open mode */
+ struct platform_device *platform_dev;
+ struct lock_class_key lock_key;
+
+ /* ops */
+ int (*open)(struct pmac_i2c_bus *bus);
+ void (*close)(struct pmac_i2c_bus *bus);
+ int (*xfer)(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+ u32 subaddr, u8 *data, int len);
+};
+
+static LIST_HEAD(pmac_i2c_busses);
+
+/*
+ * Keywest implementation
+ */
+
+struct pmac_i2c_host_kw
+{
+ struct mutex mutex; /* Access mutex for use by
+ * i2c-keywest */
+ void __iomem *base; /* register base address */
+ int bsteps; /* register stepping */
+ int speed; /* speed */
+ int irq;
+ u8 *data;
+ unsigned len;
+ int state;
+ int rw;
+ int polled;
+ int result;
+ struct completion complete;
+ spinlock_t lock;
+ struct timer_list timeout_timer;
+};
+
+/* Register indices */
+typedef enum {
+ reg_mode = 0,
+ reg_control,
+ reg_status,
+ reg_isr,
+ reg_ier,
+ reg_addr,
+ reg_subaddr,
+ reg_data
+} reg_t;
+
+/* The Tumbler audio equalizer can be really slow sometimes */
+#define KW_POLL_TIMEOUT (2*HZ)
+
+/* Mode register */
+#define KW_I2C_MODE_100KHZ 0x00
+#define KW_I2C_MODE_50KHZ 0x01
+#define KW_I2C_MODE_25KHZ 0x02
+#define KW_I2C_MODE_DUMB 0x00
+#define KW_I2C_MODE_STANDARD 0x04
+#define KW_I2C_MODE_STANDARDSUB 0x08
+#define KW_I2C_MODE_COMBINED 0x0C
+#define KW_I2C_MODE_MODE_MASK 0x0C
+#define KW_I2C_MODE_CHAN_MASK 0xF0
+
+/* Control register */
+#define KW_I2C_CTL_AAK 0x01
+#define KW_I2C_CTL_XADDR 0x02
+#define KW_I2C_CTL_STOP 0x04
+#define KW_I2C_CTL_START 0x08
+
+/* Status register */
+#define KW_I2C_STAT_BUSY 0x01
+#define KW_I2C_STAT_LAST_AAK 0x02
+#define KW_I2C_STAT_LAST_RW 0x04
+#define KW_I2C_STAT_SDA 0x08
+#define KW_I2C_STAT_SCL 0x10
+
+/* IER & ISR registers */
+#define KW_I2C_IRQ_DATA 0x01
+#define KW_I2C_IRQ_ADDR 0x02
+#define KW_I2C_IRQ_STOP 0x04
+#define KW_I2C_IRQ_START 0x08
+#define KW_I2C_IRQ_MASK 0x0F
+
+/* State machine states */
+enum {
+ state_idle,
+ state_addr,
+ state_read,
+ state_write,
+ state_stop,
+ state_dead
+};
+
+#define WRONG_STATE(name) do {\
+ printk(KERN_DEBUG "KW: wrong state. Got %s, state: %s " \
+ "(isr: %02x)\n", \
+ name, __kw_state_names[host->state], isr); \
+ } while(0)
+
+static const char *__kw_state_names[] = {
+ "state_idle",
+ "state_addr",
+ "state_read",
+ "state_write",
+ "state_stop",
+ "state_dead"
+};
+
+static inline u8 __kw_read_reg(struct pmac_i2c_host_kw *host, reg_t reg)
+{
+ return readb(host->base + (((unsigned int)reg) << host->bsteps));
+}
+
+static inline void __kw_write_reg(struct pmac_i2c_host_kw *host,
+ reg_t reg, u8 val)
+{
+ writeb(val, host->base + (((unsigned)reg) << host->bsteps));
+ (void)__kw_read_reg(host, reg_subaddr);
+}
+
+#define kw_write_reg(reg, val) __kw_write_reg(host, reg, val)
+#define kw_read_reg(reg) __kw_read_reg(host, reg)
+
+static u8 kw_i2c_wait_interrupt(struct pmac_i2c_host_kw *host)
+{
+ int i, j;
+ u8 isr;
+
+ for (i = 0; i < 1000; i++) {
+ isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK;
+ if (isr != 0)
+ return isr;
+
+ /* This code is used with the timebase frozen, we cannot rely
+ * on udelay nor schedule when in polled mode !
+ * For now, just use a bogus loop....
+ */
+ if (host->polled) {
+ for (j = 1; j < 100000; j++)
+ mb();
+ } else
+ msleep(1);
+ }
+ return isr;
+}
+
+static void kw_i2c_do_stop(struct pmac_i2c_host_kw *host, int result)
+{
+ kw_write_reg(reg_control, KW_I2C_CTL_STOP);
+ host->state = state_stop;
+ host->result = result;
+}
+
+
+static void kw_i2c_handle_interrupt(struct pmac_i2c_host_kw *host, u8 isr)
+{
+ u8 ack;
+
+ DBG_LOW("kw_handle_interrupt(%s, isr: %x)\n",
+ __kw_state_names[host->state], isr);
+
+ if (host->state == state_idle) {
+ printk(KERN_WARNING "low_i2c: Keywest got an out of state"
+ " interrupt, ignoring\n");
+ kw_write_reg(reg_isr, isr);
+ return;
+ }
+
+ if (isr == 0) {
+ printk(KERN_WARNING "low_i2c: Timeout in i2c transfer"
+ " on keywest !\n");
+ if (host->state != state_stop) {
+ kw_i2c_do_stop(host, -EIO);
+ return;
+ }
+ ack = kw_read_reg(reg_status);
+ if (ack & KW_I2C_STAT_BUSY)
+ kw_write_reg(reg_status, 0);
+ host->state = state_idle;
+ kw_write_reg(reg_ier, 0x00);
+ if (!host->polled)
+ complete(&host->complete);
+ return;
+ }
+
+ if (isr & KW_I2C_IRQ_ADDR) {
+ ack = kw_read_reg(reg_status);
+ if (host->state != state_addr) {
+ WRONG_STATE("KW_I2C_IRQ_ADDR");
+ kw_i2c_do_stop(host, -EIO);
+ }
+ if ((ack & KW_I2C_STAT_LAST_AAK) == 0) {
+ host->result = -ENXIO;
+ host->state = state_stop;
+ DBG_LOW("KW: NAK on address\n");
+ } else {
+ if (host->len == 0)
+ kw_i2c_do_stop(host, 0);
+ else if (host->rw) {
+ host->state = state_read;
+ if (host->len > 1)
+ kw_write_reg(reg_control,
+ KW_I2C_CTL_AAK);
+ } else {
+ host->state = state_write;
+ kw_write_reg(reg_data, *(host->data++));
+ host->len--;
+ }
+ }
+ kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR);
+ }
+
+ if (isr & KW_I2C_IRQ_DATA) {
+ if (host->state == state_read) {
+ *(host->data++) = kw_read_reg(reg_data);
+ host->len--;
+ kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
+ if (host->len == 0)
+ host->state = state_stop;
+ else if (host->len == 1)
+ kw_write_reg(reg_control, 0);
+ } else if (host->state == state_write) {
+ ack = kw_read_reg(reg_status);
+ if ((ack & KW_I2C_STAT_LAST_AAK) == 0) {
+ DBG_LOW("KW: nack on data write\n");
+ host->result = -EFBIG;
+ host->state = state_stop;
+ } else if (host->len) {
+ kw_write_reg(reg_data, *(host->data++));
+ host->len--;
+ } else
+ kw_i2c_do_stop(host, 0);
+ } else {
+ WRONG_STATE("KW_I2C_IRQ_DATA");
+ if (host->state != state_stop)
+ kw_i2c_do_stop(host, -EIO);
+ }
+ kw_write_reg(reg_isr, KW_I2C_IRQ_DATA);
+ }
+
+ if (isr & KW_I2C_IRQ_STOP) {
+ kw_write_reg(reg_isr, KW_I2C_IRQ_STOP);
+ if (host->state != state_stop) {
+ WRONG_STATE("KW_I2C_IRQ_STOP");
+ host->result = -EIO;
+ }
+ host->state = state_idle;
+ if (!host->polled)
+ complete(&host->complete);
+ }
+
+ /* Below should only happen in manual mode which we don't use ... */
+ if (isr & KW_I2C_IRQ_START)
+ kw_write_reg(reg_isr, KW_I2C_IRQ_START);
+
+}
+
+/* Interrupt handler */
+static irqreturn_t kw_i2c_irq(int irq, void *dev_id)
+{
+ struct pmac_i2c_host_kw *host = dev_id;
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
+ del_timer(&host->timeout_timer);
+ kw_i2c_handle_interrupt(host, kw_read_reg(reg_isr));
+ if (host->state != state_idle) {
+ host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT;
+ add_timer(&host->timeout_timer);
+ }
+ spin_unlock_irqrestore(&host->lock, flags);
+ return IRQ_HANDLED;
+}
+
+static void kw_i2c_timeout(struct timer_list *t)
+{
+ struct pmac_i2c_host_kw *host = from_timer(host, t, timeout_timer);
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
+
+ /*
+ * If the timer is pending, that means we raced with the
+ * irq, in which case we just return
+ */
+ if (timer_pending(&host->timeout_timer))
+ goto skip;
+
+ kw_i2c_handle_interrupt(host, kw_read_reg(reg_isr));
+ if (host->state != state_idle) {
+ host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT;
+ add_timer(&host->timeout_timer);
+ }
+ skip:
+ spin_unlock_irqrestore(&host->lock, flags);
+}
+
+static int kw_i2c_open(struct pmac_i2c_bus *bus)
+{
+ struct pmac_i2c_host_kw *host = bus->hostdata;
+ mutex_lock(&host->mutex);
+ return 0;
+}
+
+static void kw_i2c_close(struct pmac_i2c_bus *bus)
+{
+ struct pmac_i2c_host_kw *host = bus->hostdata;
+ mutex_unlock(&host->mutex);
+}
+
+static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+ u32 subaddr, u8 *data, int len)
+{
+ struct pmac_i2c_host_kw *host = bus->hostdata;
+ u8 mode_reg = host->speed;
+ int use_irq = host->irq && !bus->polled;
+
+ /* Setup mode & subaddress if any */
+ switch(bus->mode) {
+ case pmac_i2c_mode_dumb:
+ return -EINVAL;
+ case pmac_i2c_mode_std:
+ mode_reg |= KW_I2C_MODE_STANDARD;
+ if (subsize != 0)
+ return -EINVAL;
+ break;
+ case pmac_i2c_mode_stdsub:
+ mode_reg |= KW_I2C_MODE_STANDARDSUB;
+ if (subsize != 1)
+ return -EINVAL;
+ break;
+ case pmac_i2c_mode_combined:
+ mode_reg |= KW_I2C_MODE_COMBINED;
+ if (subsize != 1)
+ return -EINVAL;
+ break;
+ }
+
+ /* Setup channel & clear pending irqs */
+ kw_write_reg(reg_isr, kw_read_reg(reg_isr));
+ kw_write_reg(reg_mode, mode_reg | (bus->channel << 4));
+ kw_write_reg(reg_status, 0);
+
+ /* Set up address and r/w bit, strip possible stale bus number from
+ * address top bits
+ */
+ kw_write_reg(reg_addr, addrdir & 0xff);
+
+ /* Set up the sub address */
+ if ((mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_STANDARDSUB
+ || (mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_COMBINED)
+ kw_write_reg(reg_subaddr, subaddr);
+
+ /* Prepare for async operations */
+ host->data = data;
+ host->len = len;
+ host->state = state_addr;
+ host->result = 0;
+ host->rw = (addrdir & 1);
+ host->polled = bus->polled;
+
+ /* Enable interrupt if not using polled mode and interrupt is
+ * available
+ */
+ if (use_irq) {
+ /* Clear completion */
+ reinit_completion(&host->complete);
+ /* Ack stale interrupts */
+ kw_write_reg(reg_isr, kw_read_reg(reg_isr));
+ /* Arm timeout */
+ host->timeout_timer.expires = jiffies + KW_POLL_TIMEOUT;
+ add_timer(&host->timeout_timer);
+ /* Enable emission */
+ kw_write_reg(reg_ier, KW_I2C_IRQ_MASK);
+ }
+
+ /* Start sending address */
+ kw_write_reg(reg_control, KW_I2C_CTL_XADDR);
+
+ /* Wait for completion */
+ if (use_irq)
+ wait_for_completion(&host->complete);
+ else {
+ while(host->state != state_idle) {
+ unsigned long flags;
+
+ u8 isr = kw_i2c_wait_interrupt(host);
+ spin_lock_irqsave(&host->lock, flags);
+ kw_i2c_handle_interrupt(host, isr);
+ spin_unlock_irqrestore(&host->lock, flags);
+ }
+ }
+
+ /* Disable emission */
+ kw_write_reg(reg_ier, 0);
+
+ return host->result;
+}
+
+static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
+{
+ struct pmac_i2c_host_kw *host;
+ const u32 *psteps, *prate, *addrp;
+ u32 steps;
+
+ host = kzalloc(sizeof(*host), GFP_KERNEL);
+ if (host == NULL) {
+ printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n",
+ np);
+ return NULL;
+ }
+
+ /* Apple is kind enough to provide a valid AAPL,address property
+ * on all i2c keywest nodes so far ... we would have to fallback
+ * to macio parsing if that wasn't the case
+ */
+ addrp = of_get_property(np, "AAPL,address", NULL);
+ if (addrp == NULL) {
+ printk(KERN_ERR "low_i2c: Can't find address for %pOF\n",
+ np);
+ kfree(host);
+ return NULL;
+ }
+ mutex_init(&host->mutex);
+ init_completion(&host->complete);
+ spin_lock_init(&host->lock);
+ timer_setup(&host->timeout_timer, kw_i2c_timeout, 0);
+
+ psteps = of_get_property(np, "AAPL,address-step", NULL);
+ steps = psteps ? (*psteps) : 0x10;
+ for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++)
+ steps >>= 1;
+ /* Select interface rate */
+ host->speed = KW_I2C_MODE_25KHZ;
+ prate = of_get_property(np, "AAPL,i2c-rate", NULL);
+ if (prate) switch(*prate) {
+ case 100:
+ host->speed = KW_I2C_MODE_100KHZ;
+ break;
+ case 50:
+ host->speed = KW_I2C_MODE_50KHZ;
+ break;
+ case 25:
+ host->speed = KW_I2C_MODE_25KHZ;
+ break;
+ }
+ host->irq = irq_of_parse_and_map(np, 0);
+ if (!host->irq)
+ printk(KERN_WARNING
+ "low_i2c: Failed to map interrupt for %pOF\n",
+ np);
+
+ host->base = ioremap((*addrp), 0x1000);
+ if (host->base == NULL) {
+ printk(KERN_ERR "low_i2c: Can't map registers for %pOF\n",
+ np);
+ kfree(host);
+ return NULL;
+ }
+
+ /* Make sure IRQ is disabled */
+ kw_write_reg(reg_ier, 0);
+
+ /* Request chip interrupt. We set IRQF_NO_SUSPEND because we don't
+ * want that interrupt disabled between the 2 passes of driver
+ * suspend or we'll have issues running the pfuncs
+ */
+ if (request_irq(host->irq, kw_i2c_irq, IRQF_NO_SUSPEND,
+ "keywest i2c", host))
+ host->irq = 0;
+
+ printk(KERN_INFO "KeyWest i2c @0x%08x irq %d %pOF\n",
+ *addrp, host->irq, np);
+
+ return host;
+}
+
+
+static void __init kw_i2c_add(struct pmac_i2c_host_kw *host,
+ struct device_node *controller,
+ struct device_node *busnode,
+ int channel)
+{
+ struct pmac_i2c_bus *bus;
+
+ bus = kzalloc(sizeof(struct pmac_i2c_bus), GFP_KERNEL);
+ if (bus == NULL)
+ return;
+
+ bus->controller = of_node_get(controller);
+ bus->busnode = of_node_get(busnode);
+ bus->type = pmac_i2c_bus_keywest;
+ bus->hostdata = host;
+ bus->channel = channel;
+ bus->mode = pmac_i2c_mode_std;
+ bus->open = kw_i2c_open;
+ bus->close = kw_i2c_close;
+ bus->xfer = kw_i2c_xfer;
+ mutex_init(&bus->mutex);
+ lockdep_register_key(&bus->lock_key);
+ lockdep_set_class(&bus->mutex, &bus->lock_key);
+ if (controller == busnode)
+ bus->flags = pmac_i2c_multibus;
+ list_add(&bus->link, &pmac_i2c_busses);
+
+ printk(KERN_INFO " channel %d bus %s\n", channel,
+ (controller == busnode) ? "<multibus>" : busnode->full_name);
+}
+
+static void __init kw_i2c_probe(void)
+{
+ struct device_node *np, *child, *parent;
+
+ /* Probe keywest-i2c busses */
+ for_each_compatible_node(np, "i2c","keywest-i2c") {
+ struct pmac_i2c_host_kw *host;
+ int multibus;
+
+ /* Found one, init a host structure */
+ host = kw_i2c_host_init(np);
+ if (host == NULL)
+ continue;
+
+ /* Now check if we have a multibus setup (old style) or if we
+ * have proper bus nodes. Note that the "new" way (proper bus
+ * nodes) might cause us to not create some busses that are
+ * kept hidden in the device-tree. In the future, we might
+ * want to work around that by creating busses without a node
+ * but not for now
+ */
+ child = of_get_next_child(np, NULL);
+ multibus = !of_node_name_eq(child, "i2c-bus");
+ of_node_put(child);
+
+ /* For a multibus setup, we get the bus count based on the
+ * parent type
+ */
+ if (multibus) {
+ int chans, i;
+
+ parent = of_get_parent(np);
+ if (parent == NULL)
+ continue;
+ chans = parent->name[0] == 'u' ? 2 : 1;
+ of_node_put(parent);
+ for (i = 0; i < chans; i++)
+ kw_i2c_add(host, np, np, i);
+ } else {
+ for_each_child_of_node(np, child) {
+ const u32 *reg = of_get_property(child,
+ "reg", NULL);
+ if (reg == NULL)
+ continue;
+ kw_i2c_add(host, np, child, *reg);
+ }
+ }
+ }
+}
+
+
+/*
+ *
+ * PMU implementation
+ *
+ */
+
+#ifdef CONFIG_ADB_PMU
+
+/*
+ * i2c command block to the PMU
+ */
+struct pmu_i2c_hdr {
+ u8 bus;
+ u8 mode;
+ u8 bus2;
+ u8 address;
+ u8 sub_addr;
+ u8 comb_addr;
+ u8 count;
+ u8 data[];
+};
+
+static void pmu_i2c_complete(struct adb_request *req)
+{
+ complete(req->arg);
+}
+
+static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+ u32 subaddr, u8 *data, int len)
+{
+ struct adb_request *req = bus->hostdata;
+ struct pmu_i2c_hdr *hdr = (struct pmu_i2c_hdr *)&req->data[1];
+ struct completion comp;
+ int read = addrdir & 1;
+ int retry;
+ int rc = 0;
+
+ /* For now, limit ourselves to 16 bytes transfers */
+ if (len > 16)
+ return -EINVAL;
+
+ init_completion(&comp);
+
+ for (retry = 0; retry < 16; retry++) {
+ memset(req, 0, sizeof(struct adb_request));
+ hdr->bus = bus->channel;
+ hdr->count = len;
+
+ switch(bus->mode) {
+ case pmac_i2c_mode_std:
+ if (subsize != 0)
+ return -EINVAL;
+ hdr->address = addrdir;
+ hdr->mode = PMU_I2C_MODE_SIMPLE;
+ break;
+ case pmac_i2c_mode_stdsub:
+ case pmac_i2c_mode_combined:
+ if (subsize != 1)
+ return -EINVAL;
+ hdr->address = addrdir & 0xfe;
+ hdr->comb_addr = addrdir;
+ hdr->sub_addr = subaddr;
+ if (bus->mode == pmac_i2c_mode_stdsub)
+ hdr->mode = PMU_I2C_MODE_STDSUB;
+ else
+ hdr->mode = PMU_I2C_MODE_COMBINED;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ reinit_completion(&comp);
+ req->data[0] = PMU_I2C_CMD;
+ req->reply[0] = 0xff;
+ req->nbytes = sizeof(struct pmu_i2c_hdr) + 1;
+ req->done = pmu_i2c_complete;
+ req->arg = &comp;
+ if (!read && len) {
+ memcpy(hdr->data, data, len);
+ req->nbytes += len;
+ }
+ rc = pmu_queue_request(req);
+ if (rc)
+ return rc;
+ wait_for_completion(&comp);
+ if (req->reply[0] == PMU_I2C_STATUS_OK)
+ break;
+ msleep(15);
+ }
+ if (req->reply[0] != PMU_I2C_STATUS_OK)
+ return -EIO;
+
+ for (retry = 0; retry < 16; retry++) {
+ memset(req, 0, sizeof(struct adb_request));
+
+ /* I know that looks like a lot, slow as hell, but darwin
+ * does it so let's be on the safe side for now
+ */
+ msleep(15);
+
+ hdr->bus = PMU_I2C_BUS_STATUS;
+
+ reinit_completion(&comp);
+ req->data[0] = PMU_I2C_CMD;
+ req->reply[0] = 0xff;
+ req->nbytes = 2;
+ req->done = pmu_i2c_complete;
+ req->arg = &comp;
+ rc = pmu_queue_request(req);
+ if (rc)
+ return rc;
+ wait_for_completion(&comp);
+
+ if (req->reply[0] == PMU_I2C_STATUS_OK && !read)
+ return 0;
+ if (req->reply[0] == PMU_I2C_STATUS_DATAREAD && read) {
+ int rlen = req->reply_len - 1;
+
+ if (rlen != len) {
+ printk(KERN_WARNING "low_i2c: PMU returned %d"
+ " bytes, expected %d !\n", rlen, len);
+ return -EIO;
+ }
+ if (len)
+ memcpy(data, &req->reply[1], len);
+ return 0;
+ }
+ }
+ return -EIO;
+}
+
+static void __init pmu_i2c_probe(void)
+{
+ struct pmac_i2c_bus *bus;
+ struct device_node *busnode;
+ int channel, sz;
+
+ if (!pmu_present())
+ return;
+
+ /* There might or might not be a "pmu-i2c" node, we use that
+ * or via-pmu itself, whatever we find. I haven't seen a machine
+ * with separate bus nodes, so we assume a multibus setup
+ */
+ busnode = of_find_node_by_name(NULL, "pmu-i2c");
+ if (busnode == NULL)
+ busnode = of_find_node_by_name(NULL, "via-pmu");
+ if (busnode == NULL)
+ return;
+
+ printk(KERN_INFO "PMU i2c %pOF\n", busnode);
+
+ /*
+ * We add bus 1 and 2 only for now, bus 0 is "special"
+ */
+ for (channel = 1; channel <= 2; channel++) {
+ sz = sizeof(struct pmac_i2c_bus) + sizeof(struct adb_request);
+ bus = kzalloc(sz, GFP_KERNEL);
+ if (bus == NULL)
+ return;
+
+ bus->controller = busnode;
+ bus->busnode = busnode;
+ bus->type = pmac_i2c_bus_pmu;
+ bus->channel = channel;
+ bus->mode = pmac_i2c_mode_std;
+ bus->hostdata = bus + 1;
+ bus->xfer = pmu_i2c_xfer;
+ mutex_init(&bus->mutex);
+ lockdep_register_key(&bus->lock_key);
+ lockdep_set_class(&bus->mutex, &bus->lock_key);
+ bus->flags = pmac_i2c_multibus;
+ list_add(&bus->link, &pmac_i2c_busses);
+
+ printk(KERN_INFO " channel %d bus <multibus>\n", channel);
+ }
+}
+
+#endif /* CONFIG_ADB_PMU */
+
+
+/*
+ *
+ * SMU implementation
+ *
+ */
+
+#ifdef CONFIG_PMAC_SMU
+
+static void smu_i2c_complete(struct smu_i2c_cmd *cmd, void *misc)
+{
+ complete(misc);
+}
+
+static int smu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+ u32 subaddr, u8 *data, int len)
+{
+ struct smu_i2c_cmd *cmd = bus->hostdata;
+ struct completion comp;
+ int read = addrdir & 1;
+ int rc = 0;
+
+ if ((read && len > SMU_I2C_READ_MAX) ||
+ ((!read) && len > SMU_I2C_WRITE_MAX))
+ return -EINVAL;
+
+ memset(cmd, 0, sizeof(struct smu_i2c_cmd));
+ cmd->info.bus = bus->channel;
+ cmd->info.devaddr = addrdir;
+ cmd->info.datalen = len;
+
+ switch(bus->mode) {
+ case pmac_i2c_mode_std:
+ if (subsize != 0)
+ return -EINVAL;
+ cmd->info.type = SMU_I2C_TRANSFER_SIMPLE;
+ break;
+ case pmac_i2c_mode_stdsub:
+ case pmac_i2c_mode_combined:
+ if (subsize > 3 || subsize < 1)
+ return -EINVAL;
+ cmd->info.sublen = subsize;
+ /* that's big-endian only but heh ! */
+ memcpy(&cmd->info.subaddr, ((char *)&subaddr) + (4 - subsize),
+ subsize);
+ if (bus->mode == pmac_i2c_mode_stdsub)
+ cmd->info.type = SMU_I2C_TRANSFER_STDSUB;
+ else
+ cmd->info.type = SMU_I2C_TRANSFER_COMBINED;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (!read && len)
+ memcpy(cmd->info.data, data, len);
+
+ init_completion(&comp);
+ cmd->done = smu_i2c_complete;
+ cmd->misc = &comp;
+ rc = smu_queue_i2c(cmd);
+ if (rc < 0)
+ return rc;
+ wait_for_completion(&comp);
+ rc = cmd->status;
+
+ if (read && len)
+ memcpy(data, cmd->info.data, len);
+ return rc < 0 ? rc : 0;
+}
+
+static void __init smu_i2c_probe(void)
+{
+ struct device_node *controller, *busnode;
+ struct pmac_i2c_bus *bus;
+ const u32 *reg;
+ int sz;
+
+ if (!smu_present())
+ return;
+
+ controller = of_find_node_by_name(NULL, "smu-i2c-control");
+ if (controller == NULL)
+ controller = of_find_node_by_name(NULL, "smu");
+ if (controller == NULL)
+ return;
+
+ printk(KERN_INFO "SMU i2c %pOF\n", controller);
+
+ /* Look for childs, note that they might not be of the right
+ * type as older device trees mix i2c busses and other things
+ * at the same level
+ */
+ for_each_child_of_node(controller, busnode) {
+ if (!of_node_is_type(busnode, "i2c") &&
+ !of_node_is_type(busnode, "i2c-bus"))
+ continue;
+ reg = of_get_property(busnode, "reg", NULL);
+ if (reg == NULL)
+ continue;
+
+ sz = sizeof(struct pmac_i2c_bus) + sizeof(struct smu_i2c_cmd);
+ bus = kzalloc(sz, GFP_KERNEL);
+ if (bus == NULL)
+ return;
+
+ bus->controller = controller;
+ bus->busnode = of_node_get(busnode);
+ bus->type = pmac_i2c_bus_smu;
+ bus->channel = *reg;
+ bus->mode = pmac_i2c_mode_std;
+ bus->hostdata = bus + 1;
+ bus->xfer = smu_i2c_xfer;
+ mutex_init(&bus->mutex);
+ lockdep_register_key(&bus->lock_key);
+ lockdep_set_class(&bus->mutex, &bus->lock_key);
+ bus->flags = 0;
+ list_add(&bus->link, &pmac_i2c_busses);
+
+ printk(KERN_INFO " channel %x bus %pOF\n",
+ bus->channel, busnode);
+ }
+}
+
+#endif /* CONFIG_PMAC_SMU */
+
+/*
+ *
+ * Core code
+ *
+ */
+
+
+struct pmac_i2c_bus *pmac_i2c_find_bus(struct device_node *node)
+{
+ struct device_node *p = of_node_get(node);
+ struct device_node *prev = NULL;
+ struct pmac_i2c_bus *bus;
+
+ while(p) {
+ list_for_each_entry(bus, &pmac_i2c_busses, link) {
+ if (p == bus->busnode) {
+ if (prev && bus->flags & pmac_i2c_multibus) {
+ const u32 *reg;
+ reg = of_get_property(prev, "reg",
+ NULL);
+ if (!reg)
+ continue;
+ if (((*reg) >> 8) != bus->channel)
+ continue;
+ }
+ of_node_put(p);
+ of_node_put(prev);
+ return bus;
+ }
+ }
+ of_node_put(prev);
+ prev = p;
+ p = of_get_parent(p);
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_find_bus);
+
+u8 pmac_i2c_get_dev_addr(struct device_node *device)
+{
+ const u32 *reg = of_get_property(device, "reg", NULL);
+
+ if (reg == NULL)
+ return 0;
+
+ return (*reg) & 0xff;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_dev_addr);
+
+struct device_node *pmac_i2c_get_controller(struct pmac_i2c_bus *bus)
+{
+ return bus->controller;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_controller);
+
+struct device_node *pmac_i2c_get_bus_node(struct pmac_i2c_bus *bus)
+{
+ return bus->busnode;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_bus_node);
+
+int pmac_i2c_get_type(struct pmac_i2c_bus *bus)
+{
+ return bus->type;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_type);
+
+int pmac_i2c_get_flags(struct pmac_i2c_bus *bus)
+{
+ return bus->flags;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_flags);
+
+int pmac_i2c_get_channel(struct pmac_i2c_bus *bus)
+{
+ return bus->channel;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_channel);
+
+
+struct i2c_adapter *pmac_i2c_get_adapter(struct pmac_i2c_bus *bus)
+{
+ return &bus->adapter;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_get_adapter);
+
+struct pmac_i2c_bus *pmac_i2c_adapter_to_bus(struct i2c_adapter *adapter)
+{
+ struct pmac_i2c_bus *bus;
+
+ list_for_each_entry(bus, &pmac_i2c_busses, link)
+ if (&bus->adapter == adapter)
+ return bus;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_adapter_to_bus);
+
+int pmac_i2c_match_adapter(struct device_node *dev, struct i2c_adapter *adapter)
+{
+ struct pmac_i2c_bus *bus = pmac_i2c_find_bus(dev);
+
+ if (bus == NULL)
+ return 0;
+ return (&bus->adapter == adapter);
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_match_adapter);
+
+int pmac_low_i2c_lock(struct device_node *np)
+{
+ struct pmac_i2c_bus *bus, *found = NULL;
+
+ list_for_each_entry(bus, &pmac_i2c_busses, link) {
+ if (np == bus->controller) {
+ found = bus;
+ break;
+ }
+ }
+ if (!found)
+ return -ENODEV;
+ return pmac_i2c_open(bus, 0);
+}
+EXPORT_SYMBOL_GPL(pmac_low_i2c_lock);
+
+int pmac_low_i2c_unlock(struct device_node *np)
+{
+ struct pmac_i2c_bus *bus, *found = NULL;
+
+ list_for_each_entry(bus, &pmac_i2c_busses, link) {
+ if (np == bus->controller) {
+ found = bus;
+ break;
+ }
+ }
+ if (!found)
+ return -ENODEV;
+ pmac_i2c_close(bus);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pmac_low_i2c_unlock);
+
+
+int pmac_i2c_open(struct pmac_i2c_bus *bus, int polled)
+{
+ int rc;
+
+ mutex_lock(&bus->mutex);
+ bus->polled = polled || pmac_i2c_force_poll;
+ bus->opened = 1;
+ bus->mode = pmac_i2c_mode_std;
+ if (bus->open && (rc = bus->open(bus)) != 0) {
+ bus->opened = 0;
+ mutex_unlock(&bus->mutex);
+ return rc;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_open);
+
+void pmac_i2c_close(struct pmac_i2c_bus *bus)
+{
+ WARN_ON(!bus->opened);
+ if (bus->close)
+ bus->close(bus);
+ bus->opened = 0;
+ mutex_unlock(&bus->mutex);
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_close);
+
+int pmac_i2c_setmode(struct pmac_i2c_bus *bus, int mode)
+{
+ WARN_ON(!bus->opened);
+
+ /* Report me if you see the error below as there might be a new
+ * "combined4" mode that I need to implement for the SMU bus
+ */
+ if (mode < pmac_i2c_mode_dumb || mode > pmac_i2c_mode_combined) {
+ printk(KERN_ERR "low_i2c: Invalid mode %d requested on"
+ " bus %pOF !\n", mode, bus->busnode);
+ return -EINVAL;
+ }
+ bus->mode = mode;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_setmode);
+
+int pmac_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize,
+ u32 subaddr, u8 *data, int len)
+{
+ int rc;
+
+ WARN_ON(!bus->opened);
+
+ DBG("xfer() chan=%d, addrdir=0x%x, mode=%d, subsize=%d, subaddr=0x%x,"
+ " %d bytes, bus %pOF\n", bus->channel, addrdir, bus->mode, subsize,
+ subaddr, len, bus->busnode);
+
+ rc = bus->xfer(bus, addrdir, subsize, subaddr, data, len);
+
+#ifdef DEBUG
+ if (rc)
+ DBG("xfer error %d\n", rc);
+#endif
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pmac_i2c_xfer);
+
+/* some quirks for platform function decoding */
+enum {
+ pmac_i2c_quirk_invmask = 0x00000001u,
+ pmac_i2c_quirk_skip = 0x00000002u,
+};
+
+static void pmac_i2c_devscan(void (*callback)(struct device_node *dev,
+ int quirks))
+{
+ struct pmac_i2c_bus *bus;
+ struct device_node *np;
+ static struct whitelist_ent {
+ char *name;
+ char *compatible;
+ int quirks;
+ } whitelist[] = {
+ /* XXX Study device-tree's & apple drivers are get the quirks
+ * right !
+ */
+ /* Workaround: It seems that running the clockspreading
+ * properties on the eMac will cause lockups during boot.
+ * The machine seems to work fine without that. So for now,
+ * let's make sure i2c-hwclock doesn't match about "imic"
+ * clocks and we'll figure out if we really need to do
+ * something special about those later.
+ */
+ { "i2c-hwclock", "imic5002", pmac_i2c_quirk_skip },
+ { "i2c-hwclock", "imic5003", pmac_i2c_quirk_skip },
+ { "i2c-hwclock", NULL, pmac_i2c_quirk_invmask },
+ { "i2c-cpu-voltage", NULL, 0},
+ { "temp-monitor", NULL, 0 },
+ { "supply-monitor", NULL, 0 },
+ { NULL, NULL, 0 },
+ };
+
+ /* Only some devices need to have platform functions instantiated
+ * here. For now, we have a table. Others, like 9554 i2c GPIOs used
+ * on Xserve, if we ever do a driver for them, will use their own
+ * platform function instance
+ */
+ list_for_each_entry(bus, &pmac_i2c_busses, link) {
+ for_each_child_of_node(bus->busnode, np) {
+ struct whitelist_ent *p;
+ /* If multibus, check if device is on that bus */
+ if (bus->flags & pmac_i2c_multibus)
+ if (bus != pmac_i2c_find_bus(np))
+ continue;
+ for (p = whitelist; p->name != NULL; p++) {
+ if (!of_node_name_eq(np, p->name))
+ continue;
+ if (p->compatible &&
+ !of_device_is_compatible(np, p->compatible))
+ continue;
+ if (p->quirks & pmac_i2c_quirk_skip)
+ break;
+ callback(np, p->quirks);
+ break;
+ }
+ }
+ }
+}
+
+#define MAX_I2C_DATA 64
+
+struct pmac_i2c_pf_inst
+{
+ struct pmac_i2c_bus *bus;
+ u8 addr;
+ u8 buffer[MAX_I2C_DATA];
+ u8 scratch[MAX_I2C_DATA];
+ int bytes;
+ int quirks;
+};
+
+static void* pmac_i2c_do_begin(struct pmf_function *func, struct pmf_args *args)
+{
+ struct pmac_i2c_pf_inst *inst;
+ struct pmac_i2c_bus *bus;
+
+ bus = pmac_i2c_find_bus(func->node);
+ if (bus == NULL) {
+ printk(KERN_ERR "low_i2c: Can't find bus for %pOF (pfunc)\n",
+ func->node);
+ return NULL;
+ }
+ if (pmac_i2c_open(bus, 0)) {
+ printk(KERN_ERR "low_i2c: Can't open i2c bus for %pOF (pfunc)\n",
+ func->node);
+ return NULL;
+ }
+
+ /* XXX might need GFP_ATOMIC when called during the suspend process,
+ * but then, there are already lots of issues with suspending when
+ * near OOM that need to be resolved, the allocator itself should
+ * probably make GFP_NOIO implicit during suspend
+ */
+ inst = kzalloc(sizeof(struct pmac_i2c_pf_inst), GFP_KERNEL);
+ if (inst == NULL) {
+ pmac_i2c_close(bus);
+ return NULL;
+ }
+ inst->bus = bus;
+ inst->addr = pmac_i2c_get_dev_addr(func->node);
+ inst->quirks = (int)(long)func->driver_data;
+ return inst;
+}
+
+static void pmac_i2c_do_end(struct pmf_function *func, void *instdata)
+{
+ struct pmac_i2c_pf_inst *inst = instdata;
+
+ if (inst == NULL)
+ return;
+ pmac_i2c_close(inst->bus);
+ kfree(inst);
+}
+
+static int pmac_i2c_do_read(PMF_STD_ARGS, u32 len)
+{
+ struct pmac_i2c_pf_inst *inst = instdata;
+
+ inst->bytes = len;
+ return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_read, 0, 0,
+ inst->buffer, len);
+}
+
+static int pmac_i2c_do_write(PMF_STD_ARGS, u32 len, const u8 *data)
+{
+ struct pmac_i2c_pf_inst *inst = instdata;
+
+ return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 0, 0,
+ (u8 *)data, len);
+}
+
+/* This function is used to do the masking & OR'ing for the "rmw" type
+ * callbacks. Ze should apply the mask and OR in the values in the
+ * buffer before writing back. The problem is that it seems that
+ * various darwin drivers implement the mask/or differently, thus
+ * we need to check the quirks first
+ */
+static void pmac_i2c_do_apply_rmw(struct pmac_i2c_pf_inst *inst,
+ u32 len, const u8 *mask, const u8 *val)
+{
+ int i;
+
+ if (inst->quirks & pmac_i2c_quirk_invmask) {
+ for (i = 0; i < len; i ++)
+ inst->scratch[i] = (inst->buffer[i] & mask[i]) | val[i];
+ } else {
+ for (i = 0; i < len; i ++)
+ inst->scratch[i] = (inst->buffer[i] & ~mask[i])
+ | (val[i] & mask[i]);
+ }
+}
+
+static int pmac_i2c_do_rmw(PMF_STD_ARGS, u32 masklen, u32 valuelen,
+ u32 totallen, const u8 *maskdata,
+ const u8 *valuedata)
+{
+ struct pmac_i2c_pf_inst *inst = instdata;
+
+ if (masklen > inst->bytes || valuelen > inst->bytes ||
+ totallen > inst->bytes || valuelen > masklen)
+ return -EINVAL;
+
+ pmac_i2c_do_apply_rmw(inst, masklen, maskdata, valuedata);
+
+ return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 0, 0,
+ inst->scratch, totallen);
+}
+
+static int pmac_i2c_do_read_sub(PMF_STD_ARGS, u8 subaddr, u32 len)
+{
+ struct pmac_i2c_pf_inst *inst = instdata;
+
+ inst->bytes = len;
+ return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_read, 1, subaddr,
+ inst->buffer, len);
+}
+
+static int pmac_i2c_do_write_sub(PMF_STD_ARGS, u8 subaddr, u32 len,
+ const u8 *data)
+{
+ struct pmac_i2c_pf_inst *inst = instdata;
+
+ return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 1,
+ subaddr, (u8 *)data, len);
+}
+
+static int pmac_i2c_do_set_mode(PMF_STD_ARGS, int mode)
+{
+ struct pmac_i2c_pf_inst *inst = instdata;
+
+ return pmac_i2c_setmode(inst->bus, mode);
+}
+
+static int pmac_i2c_do_rmw_sub(PMF_STD_ARGS, u8 subaddr, u32 masklen,
+ u32 valuelen, u32 totallen, const u8 *maskdata,
+ const u8 *valuedata)
+{
+ struct pmac_i2c_pf_inst *inst = instdata;
+
+ if (masklen > inst->bytes || valuelen > inst->bytes ||
+ totallen > inst->bytes || valuelen > masklen)
+ return -EINVAL;
+
+ pmac_i2c_do_apply_rmw(inst, masklen, maskdata, valuedata);
+
+ return pmac_i2c_xfer(inst->bus, inst->addr | pmac_i2c_write, 1,
+ subaddr, inst->scratch, totallen);
+}
+
+static int pmac_i2c_do_mask_and_comp(PMF_STD_ARGS, u32 len,
+ const u8 *maskdata,
+ const u8 *valuedata)
+{
+ struct pmac_i2c_pf_inst *inst = instdata;
+ int i, match;
+
+ /* Get return value pointer, it's assumed to be a u32 */
+ if (!args || !args->count || !args->u[0].p)
+ return -EINVAL;
+
+ /* Check buffer */
+ if (len > inst->bytes)
+ return -EINVAL;
+
+ for (i = 0, match = 1; match && i < len; i ++)
+ if ((inst->buffer[i] & maskdata[i]) != valuedata[i])
+ match = 0;
+ *args->u[0].p = match;
+ return 0;
+}
+
+static int pmac_i2c_do_delay(PMF_STD_ARGS, u32 duration)
+{
+ msleep((duration + 999) / 1000);
+ return 0;
+}
+
+
+static struct pmf_handlers pmac_i2c_pfunc_handlers = {
+ .begin = pmac_i2c_do_begin,
+ .end = pmac_i2c_do_end,
+ .read_i2c = pmac_i2c_do_read,
+ .write_i2c = pmac_i2c_do_write,
+ .rmw_i2c = pmac_i2c_do_rmw,
+ .read_i2c_sub = pmac_i2c_do_read_sub,
+ .write_i2c_sub = pmac_i2c_do_write_sub,
+ .rmw_i2c_sub = pmac_i2c_do_rmw_sub,
+ .set_i2c_mode = pmac_i2c_do_set_mode,
+ .mask_and_compare = pmac_i2c_do_mask_and_comp,
+ .delay = pmac_i2c_do_delay,
+};
+
+static void __init pmac_i2c_dev_create(struct device_node *np, int quirks)
+{
+ DBG("dev_create(%pOF)\n", np);
+
+ pmf_register_driver(np, &pmac_i2c_pfunc_handlers,
+ (void *)(long)quirks);
+}
+
+static void __init pmac_i2c_dev_init(struct device_node *np, int quirks)
+{
+ DBG("dev_create(%pOF)\n", np);
+
+ pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
+}
+
+static void pmac_i2c_dev_suspend(struct device_node *np, int quirks)
+{
+ DBG("dev_suspend(%pOF)\n", np);
+ pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_SLEEP, NULL);
+}
+
+static void pmac_i2c_dev_resume(struct device_node *np, int quirks)
+{
+ DBG("dev_resume(%pOF)\n", np);
+ pmf_do_functions(np, NULL, 0, PMF_FLAGS_ON_WAKE, NULL);
+}
+
+void pmac_pfunc_i2c_suspend(void)
+{
+ pmac_i2c_devscan(pmac_i2c_dev_suspend);
+}
+
+void pmac_pfunc_i2c_resume(void)
+{
+ pmac_i2c_devscan(pmac_i2c_dev_resume);
+}
+
+/*
+ * Initialize us: probe all i2c busses on the machine, instantiate
+ * busses and platform functions as needed.
+ */
+/* This is non-static as it might be called early by smp code */
+int __init pmac_i2c_init(void)
+{
+ static int i2c_inited;
+
+ if (i2c_inited)
+ return 0;
+ i2c_inited = 1;
+
+ /* Probe keywest-i2c busses */
+ kw_i2c_probe();
+
+#ifdef CONFIG_ADB_PMU
+ /* Probe PMU i2c busses */
+ pmu_i2c_probe();
+#endif
+
+#ifdef CONFIG_PMAC_SMU
+ /* Probe SMU i2c busses */
+ smu_i2c_probe();
+#endif
+
+ /* Now add platform functions for some known devices */
+ pmac_i2c_devscan(pmac_i2c_dev_create);
+
+ return 0;
+}
+machine_arch_initcall(powermac, pmac_i2c_init);
+
+/* Since pmac_i2c_init can be called too early for the platform device
+ * registration, we need to do it at a later time. In our case, subsys
+ * happens to fit well, though I agree it's a bit of a hack...
+ */
+static int __init pmac_i2c_create_platform_devices(void)
+{
+ struct pmac_i2c_bus *bus;
+ int i = 0;
+
+ /* In the case where we are initialized from smp_init(), we must
+ * not use the timer (and thus the irq). It's safe from now on
+ * though
+ */
+ pmac_i2c_force_poll = 0;
+
+ /* Create platform devices */
+ list_for_each_entry(bus, &pmac_i2c_busses, link) {
+ bus->platform_dev =
+ platform_device_alloc("i2c-powermac", i++);
+ if (bus->platform_dev == NULL)
+ return -ENOMEM;
+ bus->platform_dev->dev.platform_data = bus;
+ bus->platform_dev->dev.of_node = bus->busnode;
+ platform_device_add(bus->platform_dev);
+ }
+
+ /* Now call platform "init" functions */
+ pmac_i2c_devscan(pmac_i2c_dev_init);
+
+ return 0;
+}
+machine_subsys_initcall(powermac, pmac_i2c_create_platform_devices);
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
new file mode 100644
index 000000000..fe2e0249c
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2002 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Todo: - add support for the OF persistent properties
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/nvram.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/memblock.h>
+#include <linux/completion.h>
+#include <linux/spinlock.h>
+#include <linux/of_address.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/nvram.h>
+
+#include "pmac.h"
+
+#define DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define NVRAM_SIZE 0x2000 /* 8kB of non-volatile RAM */
+
+#define CORE99_SIGNATURE 0x5a
+#define CORE99_ADLER_START 0x14
+
+/* On Core99, nvram is either a sharp, a micron or an AMD flash */
+#define SM_FLASH_STATUS_DONE 0x80
+#define SM_FLASH_STATUS_ERR 0x38
+
+#define SM_FLASH_CMD_ERASE_CONFIRM 0xd0
+#define SM_FLASH_CMD_ERASE_SETUP 0x20
+#define SM_FLASH_CMD_RESET 0xff
+#define SM_FLASH_CMD_WRITE_SETUP 0x40
+#define SM_FLASH_CMD_CLEAR_STATUS 0x50
+#define SM_FLASH_CMD_READ_STATUS 0x70
+
+/* CHRP NVRAM header */
+struct chrp_header {
+ u8 signature;
+ u8 cksum;
+ u16 len;
+ char name[12];
+ u8 data[];
+};
+
+struct core99_header {
+ struct chrp_header hdr;
+ u32 adler;
+ u32 generation;
+ u32 reserved[2];
+};
+
+/*
+ * Read and write the non-volatile RAM on PowerMacs and CHRP machines.
+ */
+static int nvram_naddrs;
+static volatile unsigned char __iomem *nvram_data;
+static int is_core_99;
+static int core99_bank;
+static int nvram_partitions[3];
+// XXX Turn that into a sem
+static DEFINE_RAW_SPINLOCK(nv_lock);
+
+static int (*core99_write_bank)(int bank, u8* datas);
+static int (*core99_erase_bank)(int bank);
+
+static char *nvram_image;
+
+
+static unsigned char core99_nvram_read_byte(int addr)
+{
+ if (nvram_image == NULL)
+ return 0xff;
+ return nvram_image[addr];
+}
+
+static void core99_nvram_write_byte(int addr, unsigned char val)
+{
+ if (nvram_image == NULL)
+ return;
+ nvram_image[addr] = val;
+}
+
+static ssize_t core99_nvram_read(char *buf, size_t count, loff_t *index)
+{
+ int i;
+
+ if (nvram_image == NULL)
+ return -ENODEV;
+ if (*index > NVRAM_SIZE)
+ return 0;
+
+ i = *index;
+ if (i + count > NVRAM_SIZE)
+ count = NVRAM_SIZE - i;
+
+ memcpy(buf, &nvram_image[i], count);
+ *index = i + count;
+ return count;
+}
+
+static ssize_t core99_nvram_write(char *buf, size_t count, loff_t *index)
+{
+ int i;
+
+ if (nvram_image == NULL)
+ return -ENODEV;
+ if (*index > NVRAM_SIZE)
+ return 0;
+
+ i = *index;
+ if (i + count > NVRAM_SIZE)
+ count = NVRAM_SIZE - i;
+
+ memcpy(&nvram_image[i], buf, count);
+ *index = i + count;
+ return count;
+}
+
+static ssize_t core99_nvram_size(void)
+{
+ if (nvram_image == NULL)
+ return -ENODEV;
+ return NVRAM_SIZE;
+}
+
+#ifdef CONFIG_PPC32
+static volatile unsigned char __iomem *nvram_addr;
+static int nvram_mult;
+
+static ssize_t ppc32_nvram_size(void)
+{
+ return NVRAM_SIZE;
+}
+
+static unsigned char direct_nvram_read_byte(int addr)
+{
+ return in_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult]);
+}
+
+static void direct_nvram_write_byte(int addr, unsigned char val)
+{
+ out_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult], val);
+}
+
+
+static unsigned char indirect_nvram_read_byte(int addr)
+{
+ unsigned char val;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&nv_lock, flags);
+ out_8(nvram_addr, addr >> 5);
+ val = in_8(&nvram_data[(addr & 0x1f) << 4]);
+ raw_spin_unlock_irqrestore(&nv_lock, flags);
+
+ return val;
+}
+
+static void indirect_nvram_write_byte(int addr, unsigned char val)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&nv_lock, flags);
+ out_8(nvram_addr, addr >> 5);
+ out_8(&nvram_data[(addr & 0x1f) << 4], val);
+ raw_spin_unlock_irqrestore(&nv_lock, flags);
+}
+
+
+#ifdef CONFIG_ADB_PMU
+
+static void pmu_nvram_complete(struct adb_request *req)
+{
+ if (req->arg)
+ complete((struct completion *)req->arg);
+}
+
+static unsigned char pmu_nvram_read_byte(int addr)
+{
+ struct adb_request req;
+ DECLARE_COMPLETION_ONSTACK(req_complete);
+
+ req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL;
+ if (pmu_request(&req, pmu_nvram_complete, 3, PMU_READ_NVRAM,
+ (addr >> 8) & 0xff, addr & 0xff))
+ return 0xff;
+ if (system_state == SYSTEM_RUNNING)
+ wait_for_completion(&req_complete);
+ while (!req.complete)
+ pmu_poll();
+ return req.reply[0];
+}
+
+static void pmu_nvram_write_byte(int addr, unsigned char val)
+{
+ struct adb_request req;
+ DECLARE_COMPLETION_ONSTACK(req_complete);
+
+ req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL;
+ if (pmu_request(&req, pmu_nvram_complete, 4, PMU_WRITE_NVRAM,
+ (addr >> 8) & 0xff, addr & 0xff, val))
+ return;
+ if (system_state == SYSTEM_RUNNING)
+ wait_for_completion(&req_complete);
+ while (!req.complete)
+ pmu_poll();
+}
+
+#endif /* CONFIG_ADB_PMU */
+#endif /* CONFIG_PPC32 */
+
+static u8 chrp_checksum(struct chrp_header* hdr)
+{
+ u8 *ptr;
+ u16 sum = hdr->signature;
+ for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++)
+ sum += *ptr;
+ while (sum > 0xFF)
+ sum = (sum & 0xFF) + (sum>>8);
+ return sum;
+}
+
+static u32 core99_calc_adler(u8 *buffer)
+{
+ int cnt;
+ u32 low, high;
+
+ buffer += CORE99_ADLER_START;
+ low = 1;
+ high = 0;
+ for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) {
+ if ((cnt % 5000) == 0) {
+ high %= 65521UL;
+ high %= 65521UL;
+ }
+ low += buffer[cnt];
+ high += low;
+ }
+ low %= 65521UL;
+ high %= 65521UL;
+
+ return (high << 16) | low;
+}
+
+static u32 __init core99_check(u8 *datas)
+{
+ struct core99_header* hdr99 = (struct core99_header*)datas;
+
+ if (hdr99->hdr.signature != CORE99_SIGNATURE) {
+ DBG("Invalid signature\n");
+ return 0;
+ }
+ if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) {
+ DBG("Invalid checksum\n");
+ return 0;
+ }
+ if (hdr99->adler != core99_calc_adler(datas)) {
+ DBG("Invalid adler\n");
+ return 0;
+ }
+ return hdr99->generation;
+}
+
+static int sm_erase_bank(int bank)
+{
+ int stat;
+ unsigned long timeout;
+
+ u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+ DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank);
+
+ out_8(base, SM_FLASH_CMD_ERASE_SETUP);
+ out_8(base, SM_FLASH_CMD_ERASE_CONFIRM);
+ timeout = 0;
+ do {
+ if (++timeout > 1000000) {
+ printk(KERN_ERR "nvram: Sharp/Micron flash erase timeout !\n");
+ break;
+ }
+ out_8(base, SM_FLASH_CMD_READ_STATUS);
+ stat = in_8(base);
+ } while (!(stat & SM_FLASH_STATUS_DONE));
+
+ out_8(base, SM_FLASH_CMD_CLEAR_STATUS);
+ out_8(base, SM_FLASH_CMD_RESET);
+
+ if (memchr_inv(base, 0xff, NVRAM_SIZE)) {
+ printk(KERN_ERR "nvram: Sharp/Micron flash erase failed !\n");
+ return -ENXIO;
+ }
+ return 0;
+}
+
+static int sm_write_bank(int bank, u8* datas)
+{
+ int i, stat = 0;
+ unsigned long timeout;
+
+ u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+ DBG("nvram: Sharp/Micron Writing bank %d...\n", bank);
+
+ for (i=0; i<NVRAM_SIZE; i++) {
+ out_8(base+i, SM_FLASH_CMD_WRITE_SETUP);
+ udelay(1);
+ out_8(base+i, datas[i]);
+ timeout = 0;
+ do {
+ if (++timeout > 1000000) {
+ printk(KERN_ERR "nvram: Sharp/Micron flash write timeout !\n");
+ break;
+ }
+ out_8(base, SM_FLASH_CMD_READ_STATUS);
+ stat = in_8(base);
+ } while (!(stat & SM_FLASH_STATUS_DONE));
+ if (!(stat & SM_FLASH_STATUS_DONE))
+ break;
+ }
+ out_8(base, SM_FLASH_CMD_CLEAR_STATUS);
+ out_8(base, SM_FLASH_CMD_RESET);
+ if (memcmp(base, datas, NVRAM_SIZE)) {
+ printk(KERN_ERR "nvram: Sharp/Micron flash write failed !\n");
+ return -ENXIO;
+ }
+ return 0;
+}
+
+static int amd_erase_bank(int bank)
+{
+ int stat = 0;
+ unsigned long timeout;
+
+ u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+ DBG("nvram: AMD Erasing bank %d...\n", bank);
+
+ /* Unlock 1 */
+ out_8(base+0x555, 0xaa);
+ udelay(1);
+ /* Unlock 2 */
+ out_8(base+0x2aa, 0x55);
+ udelay(1);
+
+ /* Sector-Erase */
+ out_8(base+0x555, 0x80);
+ udelay(1);
+ out_8(base+0x555, 0xaa);
+ udelay(1);
+ out_8(base+0x2aa, 0x55);
+ udelay(1);
+ out_8(base, 0x30);
+ udelay(1);
+
+ timeout = 0;
+ do {
+ if (++timeout > 1000000) {
+ printk(KERN_ERR "nvram: AMD flash erase timeout !\n");
+ break;
+ }
+ stat = in_8(base) ^ in_8(base);
+ } while (stat != 0);
+
+ /* Reset */
+ out_8(base, 0xf0);
+ udelay(1);
+
+ if (memchr_inv(base, 0xff, NVRAM_SIZE)) {
+ printk(KERN_ERR "nvram: AMD flash erase failed !\n");
+ return -ENXIO;
+ }
+ return 0;
+}
+
+static int amd_write_bank(int bank, u8* datas)
+{
+ int i, stat = 0;
+ unsigned long timeout;
+
+ u8 __iomem *base = (u8 __iomem *)nvram_data + core99_bank*NVRAM_SIZE;
+
+ DBG("nvram: AMD Writing bank %d...\n", bank);
+
+ for (i=0; i<NVRAM_SIZE; i++) {
+ /* Unlock 1 */
+ out_8(base+0x555, 0xaa);
+ udelay(1);
+ /* Unlock 2 */
+ out_8(base+0x2aa, 0x55);
+ udelay(1);
+
+ /* Write single word */
+ out_8(base+0x555, 0xa0);
+ udelay(1);
+ out_8(base+i, datas[i]);
+
+ timeout = 0;
+ do {
+ if (++timeout > 1000000) {
+ printk(KERN_ERR "nvram: AMD flash write timeout !\n");
+ break;
+ }
+ stat = in_8(base) ^ in_8(base);
+ } while (stat != 0);
+ if (stat != 0)
+ break;
+ }
+
+ /* Reset */
+ out_8(base, 0xf0);
+ udelay(1);
+
+ if (memcmp(base, datas, NVRAM_SIZE)) {
+ printk(KERN_ERR "nvram: AMD flash write failed !\n");
+ return -ENXIO;
+ }
+ return 0;
+}
+
+static void __init lookup_partitions(void)
+{
+ u8 buffer[17];
+ int i, offset;
+ struct chrp_header* hdr;
+
+ if (pmac_newworld) {
+ nvram_partitions[pmac_nvram_OF] = -1;
+ nvram_partitions[pmac_nvram_XPRAM] = -1;
+ nvram_partitions[pmac_nvram_NR] = -1;
+ hdr = (struct chrp_header *)buffer;
+
+ offset = 0;
+ buffer[16] = 0;
+ do {
+ for (i=0;i<16;i++)
+ buffer[i] = ppc_md.nvram_read_val(offset+i);
+ if (!strcmp(hdr->name, "common"))
+ nvram_partitions[pmac_nvram_OF] = offset + 0x10;
+ if (!strcmp(hdr->name, "APL,MacOS75")) {
+ nvram_partitions[pmac_nvram_XPRAM] = offset + 0x10;
+ nvram_partitions[pmac_nvram_NR] = offset + 0x110;
+ }
+ offset += (hdr->len * 0x10);
+ } while(offset < NVRAM_SIZE);
+ } else {
+ nvram_partitions[pmac_nvram_OF] = 0x1800;
+ nvram_partitions[pmac_nvram_XPRAM] = 0x1300;
+ nvram_partitions[pmac_nvram_NR] = 0x1400;
+ }
+ DBG("nvram: OF partition at 0x%x\n", nvram_partitions[pmac_nvram_OF]);
+ DBG("nvram: XP partition at 0x%x\n", nvram_partitions[pmac_nvram_XPRAM]);
+ DBG("nvram: NR partition at 0x%x\n", nvram_partitions[pmac_nvram_NR]);
+}
+
+static void core99_nvram_sync(void)
+{
+ struct core99_header* hdr99;
+ unsigned long flags;
+
+ if (!is_core_99 || !nvram_data || !nvram_image)
+ return;
+
+ raw_spin_lock_irqsave(&nv_lock, flags);
+ if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE,
+ NVRAM_SIZE))
+ goto bail;
+
+ DBG("Updating nvram...\n");
+
+ hdr99 = (struct core99_header*)nvram_image;
+ hdr99->generation++;
+ hdr99->hdr.signature = CORE99_SIGNATURE;
+ hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr);
+ hdr99->adler = core99_calc_adler(nvram_image);
+ core99_bank = core99_bank ? 0 : 1;
+ if (core99_erase_bank)
+ if (core99_erase_bank(core99_bank)) {
+ printk("nvram: Error erasing bank %d\n", core99_bank);
+ goto bail;
+ }
+ if (core99_write_bank)
+ if (core99_write_bank(core99_bank, nvram_image))
+ printk("nvram: Error writing bank %d\n", core99_bank);
+ bail:
+ raw_spin_unlock_irqrestore(&nv_lock, flags);
+
+#ifdef DEBUG
+ mdelay(2000);
+#endif
+}
+
+static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr)
+{
+ int i;
+ u32 gen_bank0, gen_bank1;
+
+ if (nvram_naddrs < 1) {
+ printk(KERN_ERR "nvram: no address\n");
+ return -EINVAL;
+ }
+ nvram_image = memblock_alloc(NVRAM_SIZE, SMP_CACHE_BYTES);
+ if (!nvram_image)
+ panic("%s: Failed to allocate %u bytes\n", __func__,
+ NVRAM_SIZE);
+ nvram_data = ioremap(addr, NVRAM_SIZE*2);
+ nvram_naddrs = 1; /* Make sure we get the correct case */
+
+ DBG("nvram: Checking bank 0...\n");
+
+ gen_bank0 = core99_check((u8 *)nvram_data);
+ gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE);
+ core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0;
+
+ DBG("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1);
+ DBG("nvram: Active bank is: %d\n", core99_bank);
+
+ for (i=0; i<NVRAM_SIZE; i++)
+ nvram_image[i] = nvram_data[i + core99_bank*NVRAM_SIZE];
+
+ ppc_md.nvram_read_val = core99_nvram_read_byte;
+ ppc_md.nvram_write_val = core99_nvram_write_byte;
+ ppc_md.nvram_read = core99_nvram_read;
+ ppc_md.nvram_write = core99_nvram_write;
+ ppc_md.nvram_size = core99_nvram_size;
+ ppc_md.nvram_sync = core99_nvram_sync;
+ ppc_md.machine_shutdown = core99_nvram_sync;
+ /*
+ * Maybe we could be smarter here though making an exclusive list
+ * of known flash chips is a bit nasty as older OF didn't provide us
+ * with a useful "compatible" entry. A solution would be to really
+ * identify the chip using flash id commands and base ourselves on
+ * a list of known chips IDs
+ */
+ if (of_device_is_compatible(dp, "amd-0137")) {
+ core99_erase_bank = amd_erase_bank;
+ core99_write_bank = amd_write_bank;
+ } else {
+ core99_erase_bank = sm_erase_bank;
+ core99_write_bank = sm_write_bank;
+ }
+ return 0;
+}
+
+int __init pmac_nvram_init(void)
+{
+ struct device_node *dp;
+ struct resource r1, r2;
+ unsigned int s1 = 0, s2 = 0;
+ int err = 0;
+
+ nvram_naddrs = 0;
+
+ dp = of_find_node_by_name(NULL, "nvram");
+ if (dp == NULL) {
+ printk(KERN_ERR "Can't find NVRAM device\n");
+ return -ENODEV;
+ }
+
+ /* Try to obtain an address */
+ if (of_address_to_resource(dp, 0, &r1) == 0) {
+ nvram_naddrs = 1;
+ s1 = resource_size(&r1);
+ if (of_address_to_resource(dp, 1, &r2) == 0) {
+ nvram_naddrs = 2;
+ s2 = resource_size(&r2);
+ }
+ }
+
+ is_core_99 = of_device_is_compatible(dp, "nvram,flash");
+ if (is_core_99) {
+ err = core99_nvram_setup(dp, r1.start);
+ goto bail;
+ }
+
+#ifdef CONFIG_PPC32
+ if (machine_is(chrp) && nvram_naddrs == 1) {
+ nvram_data = ioremap(r1.start, s1);
+ nvram_mult = 1;
+ ppc_md.nvram_read_val = direct_nvram_read_byte;
+ ppc_md.nvram_write_val = direct_nvram_write_byte;
+ ppc_md.nvram_size = ppc32_nvram_size;
+ } else if (nvram_naddrs == 1) {
+ nvram_data = ioremap(r1.start, s1);
+ nvram_mult = (s1 + NVRAM_SIZE - 1) / NVRAM_SIZE;
+ ppc_md.nvram_read_val = direct_nvram_read_byte;
+ ppc_md.nvram_write_val = direct_nvram_write_byte;
+ ppc_md.nvram_size = ppc32_nvram_size;
+ } else if (nvram_naddrs == 2) {
+ nvram_addr = ioremap(r1.start, s1);
+ nvram_data = ioremap(r2.start, s2);
+ ppc_md.nvram_read_val = indirect_nvram_read_byte;
+ ppc_md.nvram_write_val = indirect_nvram_write_byte;
+ ppc_md.nvram_size = ppc32_nvram_size;
+ } else if (nvram_naddrs == 0 && sys_ctrler == SYS_CTRLER_PMU) {
+#ifdef CONFIG_ADB_PMU
+ nvram_naddrs = -1;
+ ppc_md.nvram_read_val = pmu_nvram_read_byte;
+ ppc_md.nvram_write_val = pmu_nvram_write_byte;
+ ppc_md.nvram_size = ppc32_nvram_size;
+#endif /* CONFIG_ADB_PMU */
+ } else {
+ printk(KERN_ERR "Incompatible type of NVRAM\n");
+ err = -ENXIO;
+ }
+#endif /* CONFIG_PPC32 */
+bail:
+ of_node_put(dp);
+ if (err == 0)
+ lookup_partitions();
+ return err;
+}
+
+int pmac_get_partition(int partition)
+{
+ return nvram_partitions[partition];
+}
+
+u8 pmac_xpram_read(int xpaddr)
+{
+ int offset = pmac_get_partition(pmac_nvram_XPRAM);
+
+ if (offset < 0 || xpaddr < 0 || xpaddr > 0x100)
+ return 0xff;
+
+ return ppc_md.nvram_read_val(xpaddr + offset);
+}
+
+void pmac_xpram_write(int xpaddr, u8 data)
+{
+ int offset = pmac_get_partition(pmac_nvram_XPRAM);
+
+ if (offset < 0 || xpaddr < 0 || xpaddr > 0x100)
+ return;
+
+ ppc_md.nvram_write_val(xpaddr + offset, data);
+}
+
+EXPORT_SYMBOL(pmac_get_partition);
+EXPORT_SYMBOL(pmac_xpram_read);
+EXPORT_SYMBOL(pmac_xpram_write);
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
new file mode 100644
index 000000000..d71359b53
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -0,0 +1,1261 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support for PCI bridges found on Power Macintoshes.
+ *
+ * Copyright (C) 2003-2005 Benjamin Herrenschmuidt (benh@kernel.crashing.org)
+ * Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/grackle.h>
+#include <asm/ppc-pci.h>
+
+#include "pmac.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+/* XXX Could be per-controller, but I don't think we risk anything by
+ * assuming we won't have both UniNorth and Bandit */
+static int has_uninorth;
+#ifdef CONFIG_PPC64
+static struct pci_controller *u3_agp;
+#else
+static int has_second_ohare;
+#endif /* CONFIG_PPC64 */
+
+extern int pcibios_assign_bus_offset;
+
+struct device_node *k2_skiplist[2];
+
+/*
+ * Magic constants for enabling cache coherency in the bandit/PSX bridge.
+ */
+#define BANDIT_DEVID_2 8
+#define BANDIT_REVID 3
+
+#define BANDIT_DEVNUM 11
+#define BANDIT_MAGIC 0x50
+#define BANDIT_COHERENT 0x40
+
+static int __init fixup_one_level_bus_range(struct device_node *node, int higher)
+{
+ for (; node; node = node->sibling) {
+ const int * bus_range;
+ const unsigned int *class_code;
+ int len;
+
+ /* For PCI<->PCI bridges or CardBus bridges, we go down */
+ class_code = of_get_property(node, "class-code", NULL);
+ if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI &&
+ (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS))
+ continue;
+ bus_range = of_get_property(node, "bus-range", &len);
+ if (bus_range != NULL && len > 2 * sizeof(int)) {
+ if (bus_range[1] > higher)
+ higher = bus_range[1];
+ }
+ higher = fixup_one_level_bus_range(node->child, higher);
+ }
+ return higher;
+}
+
+/* This routine fixes the "bus-range" property of all bridges in the
+ * system since they tend to have their "last" member wrong on macs
+ *
+ * Note that the bus numbers manipulated here are OF bus numbers, they
+ * are not Linux bus numbers.
+ */
+static void __init fixup_bus_range(struct device_node *bridge)
+{
+ int *bus_range, len;
+ struct property *prop;
+
+ /* Lookup the "bus-range" property for the hose */
+ prop = of_find_property(bridge, "bus-range", &len);
+ if (prop == NULL || prop->length < 2 * sizeof(int))
+ return;
+
+ bus_range = prop->value;
+ bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]);
+}
+
+/*
+ * Apple MacRISC (U3, UniNorth, Bandit, Chaos) PCI controllers.
+ *
+ * The "Bandit" version is present in all early PCI PowerMacs,
+ * and up to the first ones using Grackle. Some machines may
+ * have 2 bandit controllers (2 PCI busses).
+ *
+ * "Chaos" is used in some "Bandit"-type machines as a bridge
+ * for the separate display bus. It is accessed the same
+ * way as bandit, but cannot be probed for devices. It therefore
+ * has its own config access functions.
+ *
+ * The "UniNorth" version is present in all Core99 machines
+ * (iBook, G4, new IMacs, and all the recent Apple machines).
+ * It contains 3 controllers in one ASIC.
+ *
+ * The U3 is the bridge used on G5 machines. It contains an
+ * AGP bus which is dealt with the old UniNorth access routines
+ * and a HyperTransport bus which uses its own set of access
+ * functions.
+ */
+
+#define MACRISC_CFA0(devfn, off) \
+ ((1 << (unsigned int)PCI_SLOT(dev_fn)) \
+ | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
+ | (((unsigned int)(off)) & 0xFCUL))
+
+#define MACRISC_CFA1(bus, devfn, off) \
+ ((((unsigned int)(bus)) << 16) \
+ |(((unsigned int)(devfn)) << 8) \
+ |(((unsigned int)(off)) & 0xFCUL) \
+ |1UL)
+
+static void __iomem *macrisc_cfg_map_bus(struct pci_bus *bus,
+ unsigned int dev_fn,
+ int offset)
+{
+ unsigned int caddr;
+ struct pci_controller *hose;
+
+ hose = pci_bus_to_host(bus);
+ if (hose == NULL)
+ return NULL;
+
+ if (bus->number == hose->first_busno) {
+ if (dev_fn < (11 << 3))
+ return NULL;
+ caddr = MACRISC_CFA0(dev_fn, offset);
+ } else
+ caddr = MACRISC_CFA1(bus->number, dev_fn, offset);
+
+ /* Uninorth will return garbage if we don't read back the value ! */
+ do {
+ out_le32(hose->cfg_addr, caddr);
+ } while (in_le32(hose->cfg_addr) != caddr);
+
+ offset &= has_uninorth ? 0x07 : 0x03;
+ return hose->cfg_data + offset;
+}
+
+static struct pci_ops macrisc_pci_ops =
+{
+ .map_bus = macrisc_cfg_map_bus,
+ .read = pci_generic_config_read,
+ .write = pci_generic_config_write,
+};
+
+#ifdef CONFIG_PPC32
+/*
+ * Verify that a specific (bus, dev_fn) exists on chaos
+ */
+static void __iomem *chaos_map_bus(struct pci_bus *bus, unsigned int devfn,
+ int offset)
+{
+ struct device_node *np;
+ const u32 *vendor, *device;
+
+ if (offset >= 0x100)
+ return NULL;
+ np = of_pci_find_child_device(bus->dev.of_node, devfn);
+ if (np == NULL)
+ return NULL;
+
+ vendor = of_get_property(np, "vendor-id", NULL);
+ device = of_get_property(np, "device-id", NULL);
+ if (vendor == NULL || device == NULL)
+ return NULL;
+
+ if ((*vendor == 0x106b) && (*device == 3) && (offset >= 0x10)
+ && (offset != 0x14) && (offset != 0x18) && (offset <= 0x24))
+ return NULL;
+
+ return macrisc_cfg_map_bus(bus, devfn, offset);
+}
+
+static struct pci_ops chaos_pci_ops =
+{
+ .map_bus = chaos_map_bus,
+ .read = pci_generic_config_read,
+ .write = pci_generic_config_write,
+};
+
+static void __init setup_chaos(struct pci_controller *hose,
+ struct resource *addr)
+{
+ /* assume a `chaos' bridge */
+ hose->ops = &chaos_pci_ops;
+ hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000);
+ hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000);
+}
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+/*
+ * These versions of U3 HyperTransport config space access ops do not
+ * implement self-view of the HT host yet
+ */
+
+/*
+ * This function deals with some "special cases" devices.
+ *
+ * 0 -> No special case
+ * 1 -> Skip the device but act as if the access was successful
+ * (return 0xff's on reads, eventually, cache config space
+ * accesses in a later version)
+ * -1 -> Hide the device (unsuccessful access)
+ */
+static int u3_ht_skip_device(struct pci_controller *hose,
+ struct pci_bus *bus, unsigned int devfn)
+{
+ struct device_node *busdn, *dn;
+ int i;
+
+ /* We only allow config cycles to devices that are in OF device-tree
+ * as we are apparently having some weird things going on with some
+ * revs of K2 on recent G5s, except for the host bridge itself, which
+ * is missing from the tree but we know we can probe.
+ */
+ if (bus->self)
+ busdn = pci_device_to_OF_node(bus->self);
+ else if (devfn == 0)
+ return 0;
+ else
+ busdn = hose->dn;
+ for (dn = busdn->child; dn; dn = dn->sibling)
+ if (PCI_DN(dn) && PCI_DN(dn)->devfn == devfn)
+ break;
+ if (dn == NULL)
+ return -1;
+
+ /*
+ * When a device in K2 is powered down, we die on config
+ * cycle accesses. Fix that here.
+ */
+ for (i=0; i<2; i++)
+ if (k2_skiplist[i] == dn)
+ return 1;
+
+ return 0;
+}
+
+#define U3_HT_CFA0(devfn, off) \
+ ((((unsigned int)devfn) << 8) | offset)
+#define U3_HT_CFA1(bus, devfn, off) \
+ (U3_HT_CFA0(devfn, off) \
+ + (((unsigned int)bus) << 16) \
+ + 0x01000000UL)
+
+static void __iomem *u3_ht_cfg_access(struct pci_controller *hose, u8 bus,
+ u8 devfn, u8 offset, int *swap)
+{
+ *swap = 1;
+ if (bus == hose->first_busno) {
+ if (devfn != 0)
+ return hose->cfg_data + U3_HT_CFA0(devfn, offset);
+ *swap = 0;
+ return ((void __iomem *)hose->cfg_addr) + (offset << 2);
+ } else
+ return hose->cfg_data + U3_HT_CFA1(bus, devfn, offset);
+}
+
+static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 *val)
+{
+ struct pci_controller *hose;
+ void __iomem *addr;
+ int swap;
+
+ hose = pci_bus_to_host(bus);
+ if (hose == NULL)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (offset >= 0x100)
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+ addr = u3_ht_cfg_access(hose, bus->number, devfn, offset, &swap);
+ if (!addr)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ switch (u3_ht_skip_device(hose, bus, devfn)) {
+ case 0:
+ break;
+ case 1:
+ switch (len) {
+ case 1:
+ *val = 0xff; break;
+ case 2:
+ *val = 0xffff; break;
+ default:
+ *val = 0xfffffffful; break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+ default:
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+
+ /*
+ * Note: the caller has already checked that offset is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ switch (len) {
+ case 1:
+ *val = in_8(addr);
+ break;
+ case 2:
+ *val = swap ? in_le16(addr) : in_be16(addr);
+ break;
+ default:
+ *val = swap ? in_le32(addr) : in_be32(addr);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn,
+ int offset, int len, u32 val)
+{
+ struct pci_controller *hose;
+ void __iomem *addr;
+ int swap;
+
+ hose = pci_bus_to_host(bus);
+ if (hose == NULL)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ if (offset >= 0x100)
+ return PCIBIOS_BAD_REGISTER_NUMBER;
+ addr = u3_ht_cfg_access(hose, bus->number, devfn, offset, &swap);
+ if (!addr)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ switch (u3_ht_skip_device(hose, bus, devfn)) {
+ case 0:
+ break;
+ case 1:
+ return PCIBIOS_SUCCESSFUL;
+ default:
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+
+ /*
+ * Note: the caller has already checked that offset is
+ * suitably aligned and that len is 1, 2 or 4.
+ */
+ switch (len) {
+ case 1:
+ out_8(addr, val);
+ break;
+ case 2:
+ swap ? out_le16(addr, val) : out_be16(addr, val);
+ break;
+ default:
+ swap ? out_le32(addr, val) : out_be32(addr, val);
+ break;
+ }
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops u3_ht_pci_ops =
+{
+ .read = u3_ht_read_config,
+ .write = u3_ht_write_config,
+};
+
+#define U4_PCIE_CFA0(devfn, off) \
+ ((1 << ((unsigned int)PCI_SLOT(dev_fn))) \
+ | (((unsigned int)PCI_FUNC(dev_fn)) << 8) \
+ | ((((unsigned int)(off)) >> 8) << 28) \
+ | (((unsigned int)(off)) & 0xfcU))
+
+#define U4_PCIE_CFA1(bus, devfn, off) \
+ ((((unsigned int)(bus)) << 16) \
+ |(((unsigned int)(devfn)) << 8) \
+ | ((((unsigned int)(off)) >> 8) << 28) \
+ |(((unsigned int)(off)) & 0xfcU) \
+ |1UL)
+
+static void __iomem *u4_pcie_cfg_map_bus(struct pci_bus *bus,
+ unsigned int dev_fn,
+ int offset)
+{
+ struct pci_controller *hose;
+ unsigned int caddr;
+
+ if (offset >= 0x1000)
+ return NULL;
+
+ hose = pci_bus_to_host(bus);
+ if (!hose)
+ return NULL;
+
+ if (bus->number == hose->first_busno) {
+ caddr = U4_PCIE_CFA0(dev_fn, offset);
+ } else
+ caddr = U4_PCIE_CFA1(bus->number, dev_fn, offset);
+
+ /* Uninorth will return garbage if we don't read back the value ! */
+ do {
+ out_le32(hose->cfg_addr, caddr);
+ } while (in_le32(hose->cfg_addr) != caddr);
+
+ offset &= 0x03;
+ return hose->cfg_data + offset;
+}
+
+static struct pci_ops u4_pcie_pci_ops =
+{
+ .map_bus = u4_pcie_cfg_map_bus,
+ .read = pci_generic_config_read,
+ .write = pci_generic_config_write,
+};
+
+static void pmac_pci_fixup_u4_of_node(struct pci_dev *dev)
+{
+ /* Apple's device-tree "hides" the root complex virtual P2P bridge
+ * on U4. However, Linux sees it, causing the PCI <-> OF matching
+ * code to fail to properly match devices below it. This works around
+ * it by setting the node of the bridge to point to the PHB node,
+ * which is not entirely correct but fixes the matching code and
+ * doesn't break anything else. It's also the simplest possible fix.
+ */
+ if (dev->dev.of_node == NULL)
+ dev->dev.of_node = pcibios_get_phb_of_node(dev->bus);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, 0x5b, pmac_pci_fixup_u4_of_node);
+
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC32
+/*
+ * For a bandit bridge, turn on cache coherency if necessary.
+ * N.B. we could clean this up using the hose ops directly.
+ */
+static void __init init_bandit(struct pci_controller *bp)
+{
+ unsigned int vendev, magic;
+ int rev;
+
+ /* read the word at offset 0 in config space for device 11 */
+ out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + PCI_VENDOR_ID);
+ udelay(2);
+ vendev = in_le32(bp->cfg_data);
+ if (vendev == (PCI_DEVICE_ID_APPLE_BANDIT << 16) +
+ PCI_VENDOR_ID_APPLE) {
+ /* read the revision id */
+ out_le32(bp->cfg_addr,
+ (1UL << BANDIT_DEVNUM) + PCI_REVISION_ID);
+ udelay(2);
+ rev = in_8(bp->cfg_data);
+ if (rev != BANDIT_REVID)
+ printk(KERN_WARNING
+ "Unknown revision %d for bandit\n", rev);
+ } else if (vendev != (BANDIT_DEVID_2 << 16) + PCI_VENDOR_ID_APPLE) {
+ printk(KERN_WARNING "bandit isn't? (%x)\n", vendev);
+ return;
+ }
+
+ /* read the word at offset 0x50 */
+ out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + BANDIT_MAGIC);
+ udelay(2);
+ magic = in_le32(bp->cfg_data);
+ if ((magic & BANDIT_COHERENT) != 0)
+ return;
+ magic |= BANDIT_COHERENT;
+ udelay(2);
+ out_le32(bp->cfg_data, magic);
+ printk(KERN_INFO "Cache coherency enabled for bandit/PSX\n");
+}
+
+/*
+ * Tweak the PCI-PCI bridge chip on the blue & white G3s.
+ */
+static void __init init_p2pbridge(void)
+{
+ struct device_node *p2pbridge;
+ struct pci_controller* hose;
+ u8 bus, devfn;
+ u16 val;
+
+ /* XXX it would be better here to identify the specific
+ PCI-PCI bridge chip we have. */
+ p2pbridge = of_find_node_by_name(NULL, "pci-bridge");
+ if (p2pbridge == NULL || !of_node_name_eq(p2pbridge->parent, "pci"))
+ goto done;
+ if (pci_device_from_OF_node(p2pbridge, &bus, &devfn) < 0) {
+ DBG("Can't find PCI infos for PCI<->PCI bridge\n");
+ goto done;
+ }
+ /* Warning: At this point, we have not yet renumbered all busses.
+ * So we must use OF walking to find out hose
+ */
+ hose = pci_find_hose_for_OF_device(p2pbridge);
+ if (!hose) {
+ DBG("Can't find hose for PCI<->PCI bridge\n");
+ goto done;
+ }
+ if (early_read_config_word(hose, bus, devfn,
+ PCI_BRIDGE_CONTROL, &val) < 0) {
+ printk(KERN_ERR "init_p2pbridge: couldn't read bridge"
+ " control\n");
+ goto done;
+ }
+ val &= ~PCI_BRIDGE_CTL_MASTER_ABORT;
+ early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val);
+done:
+ of_node_put(p2pbridge);
+}
+
+static void __init init_second_ohare(void)
+{
+ struct device_node *np = of_find_node_by_name(NULL, "pci106b,7");
+ unsigned char bus, devfn;
+ unsigned short cmd;
+
+ if (np == NULL)
+ return;
+
+ /* This must run before we initialize the PICs since the second
+ * ohare hosts a PIC that will be accessed there.
+ */
+ if (pci_device_from_OF_node(np, &bus, &devfn) == 0) {
+ struct pci_controller* hose =
+ pci_find_hose_for_OF_device(np);
+ if (!hose) {
+ printk(KERN_ERR "Can't find PCI hose for OHare2 !\n");
+ of_node_put(np);
+ return;
+ }
+ early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd);
+ cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+ cmd &= ~PCI_COMMAND_IO;
+ early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd);
+ }
+ has_second_ohare = 1;
+ of_node_put(np);
+}
+
+/*
+ * Some Apple desktop machines have a NEC PD720100A USB2 controller
+ * on the motherboard. Open Firmware, on these, will disable the
+ * EHCI part of it so it behaves like a pair of OHCI's. This fixup
+ * code re-enables it ;)
+ */
+static void __init fixup_nec_usb2(void)
+{
+ struct device_node *nec;
+
+ for_each_node_by_name(nec, "usb") {
+ struct pci_controller *hose;
+ u32 data;
+ const u32 *prop;
+ u8 bus, devfn;
+
+ prop = of_get_property(nec, "vendor-id", NULL);
+ if (prop == NULL)
+ continue;
+ if (0x1033 != *prop)
+ continue;
+ prop = of_get_property(nec, "device-id", NULL);
+ if (prop == NULL)
+ continue;
+ if (0x0035 != *prop)
+ continue;
+ prop = of_get_property(nec, "reg", NULL);
+ if (prop == NULL)
+ continue;
+ devfn = (prop[0] >> 8) & 0xff;
+ bus = (prop[0] >> 16) & 0xff;
+ if (PCI_FUNC(devfn) != 0)
+ continue;
+ hose = pci_find_hose_for_OF_device(nec);
+ if (!hose)
+ continue;
+ early_read_config_dword(hose, bus, devfn, 0xe4, &data);
+ if (data & 1UL) {
+ printk("Found NEC PD720100A USB2 chip with disabled"
+ " EHCI, fixing up...\n");
+ data &= ~1UL;
+ early_write_config_dword(hose, bus, devfn, 0xe4, data);
+ }
+ }
+}
+
+static void __init setup_bandit(struct pci_controller *hose,
+ struct resource *addr)
+{
+ hose->ops = &macrisc_pci_ops;
+ hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000);
+ hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000);
+ init_bandit(hose);
+}
+
+static int __init setup_uninorth(struct pci_controller *hose,
+ struct resource *addr)
+{
+ pci_add_flags(PCI_REASSIGN_ALL_BUS);
+ has_uninorth = 1;
+ hose->ops = &macrisc_pci_ops;
+ hose->cfg_addr = ioremap(addr->start + 0x800000, 0x1000);
+ hose->cfg_data = ioremap(addr->start + 0xc00000, 0x1000);
+ /* We "know" that the bridge at f2000000 has the PCI slots. */
+ return addr->start == 0xf2000000;
+}
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC64
+static void __init setup_u3_agp(struct pci_controller* hose)
+{
+ /* On G5, we move AGP up to high bus number so we don't need
+ * to reassign bus numbers for HT. If we ever have P2P bridges
+ * on AGP, we'll have to move pci_assign_all_busses to the
+ * pci_controller structure so we enable it for AGP and not for
+ * HT childs.
+ * We hard code the address because of the different size of
+ * the reg address cell, we shall fix that by killing struct
+ * reg_property and using some accessor functions instead
+ */
+ hose->first_busno = 0xf0;
+ hose->last_busno = 0xff;
+ has_uninorth = 1;
+ hose->ops = &macrisc_pci_ops;
+ hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+ hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+ u3_agp = hose;
+}
+
+static void __init setup_u4_pcie(struct pci_controller* hose)
+{
+ /* We currently only implement the "non-atomic" config space, to
+ * be optimised later.
+ */
+ hose->ops = &u4_pcie_pci_ops;
+ hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000);
+ hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000);
+
+ /* The bus contains a bridge from root -> device, we need to
+ * make it visible on bus 0 so that we pick the right type
+ * of config cycles. If we didn't, we would have to force all
+ * config cycles to be type 1. So we override the "bus-range"
+ * property here
+ */
+ hose->first_busno = 0x00;
+ hose->last_busno = 0xff;
+}
+
+static void __init parse_region_decode(struct pci_controller *hose,
+ u32 decode)
+{
+ unsigned long base, end, next = -1;
+ int i, cur = -1;
+
+ /* Iterate through all bits. We ignore the last bit as this region is
+ * reserved for the ROM among other niceties
+ */
+ for (i = 0; i < 31; i++) {
+ if ((decode & (0x80000000 >> i)) == 0)
+ continue;
+ if (i < 16) {
+ base = 0xf0000000 | (((u32)i) << 24);
+ end = base + 0x00ffffff;
+ } else {
+ base = ((u32)i-16) << 28;
+ end = base + 0x0fffffff;
+ }
+ if (base != next) {
+ if (++cur >= 3) {
+ printk(KERN_WARNING "PCI: Too many ranges !\n");
+ break;
+ }
+ hose->mem_resources[cur].flags = IORESOURCE_MEM;
+ hose->mem_resources[cur].name = hose->dn->full_name;
+ hose->mem_resources[cur].start = base;
+ hose->mem_resources[cur].end = end;
+ hose->mem_offset[cur] = 0;
+ DBG(" %d: 0x%08lx-0x%08lx\n", cur, base, end);
+ } else {
+ DBG(" : -0x%08lx\n", end);
+ hose->mem_resources[cur].end = end;
+ }
+ next = end + 1;
+ }
+}
+
+static void __init setup_u3_ht(struct pci_controller* hose)
+{
+ struct device_node *np = hose->dn;
+ struct resource cfg_res, self_res;
+ u32 decode;
+
+ hose->ops = &u3_ht_pci_ops;
+
+ /* Get base addresses from OF tree
+ */
+ if (of_address_to_resource(np, 0, &cfg_res) ||
+ of_address_to_resource(np, 1, &self_res)) {
+ printk(KERN_ERR "PCI: Failed to get U3/U4 HT resources !\n");
+ return;
+ }
+
+ /* Map external cfg space access into cfg_data and self registers
+ * into cfg_addr
+ */
+ hose->cfg_data = ioremap(cfg_res.start, 0x02000000);
+ hose->cfg_addr = ioremap(self_res.start, resource_size(&self_res));
+
+ /*
+ * /ht node doesn't expose a "ranges" property, we read the register
+ * that controls the decoding logic and use that for memory regions.
+ * The IO region is hard coded since it is fixed in HW as well.
+ */
+ hose->io_base_phys = 0xf4000000;
+ hose->pci_io_size = 0x00400000;
+ hose->io_resource.name = np->full_name;
+ hose->io_resource.start = 0;
+ hose->io_resource.end = 0x003fffff;
+ hose->io_resource.flags = IORESOURCE_IO;
+ hose->first_busno = 0;
+ hose->last_busno = 0xef;
+
+ /* Note: fix offset when cfg_addr becomes a void * */
+ decode = in_be32(hose->cfg_addr + 0x80);
+
+ DBG("PCI: Apple HT bridge decode register: 0x%08x\n", decode);
+
+ /* NOTE: The decode register setup is a bit weird... region
+ * 0xf8000000 for example is marked as enabled in there while it's
+ & actually the memory controller registers.
+ * That means that we are incorrectly attributing it to HT.
+ *
+ * In a similar vein, region 0xf4000000 is actually the HT IO space but
+ * also marked as enabled in here and 0xf9000000 is used by some other
+ * internal bits of the northbridge.
+ *
+ * Unfortunately, we can't just mask out those bit as we would end
+ * up with more regions than we can cope (linux can only cope with
+ * 3 memory regions for a PHB at this stage).
+ *
+ * So for now, we just do a little hack. We happen to -know- that
+ * Apple firmware doesn't assign things below 0xfa000000 for that
+ * bridge anyway so we mask out all bits we don't want.
+ */
+ decode &= 0x003fffff;
+
+ /* Now parse the resulting bits and build resources */
+ parse_region_decode(hose, decode);
+}
+#endif /* CONFIG_PPC64 */
+
+/*
+ * We assume that if we have a G3 powermac, we have one bridge called
+ * "pci" (a MPC106) and no bandit or chaos bridges, and contrariwise,
+ * if we have one or more bandit or chaos bridges, we don't have a MPC106.
+ */
+static int __init pmac_add_bridge(struct device_node *dev)
+{
+ int len;
+ struct pci_controller *hose;
+ struct resource rsrc;
+ char *disp_name;
+ const int *bus_range;
+ int primary = 1;
+
+ DBG("Adding PCI host bridge %pOF\n", dev);
+
+ /* Fetch host bridge registers address */
+ of_address_to_resource(dev, 0, &rsrc);
+
+ /* Get bus range if any */
+ bus_range = of_get_property(dev, "bus-range", &len);
+ if (bus_range == NULL || len < 2 * sizeof(int)) {
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
+ }
+
+ hose = pcibios_alloc_controller(dev);
+ if (!hose)
+ return -ENOMEM;
+ hose->first_busno = bus_range ? bus_range[0] : 0;
+ hose->last_busno = bus_range ? bus_range[1] : 0xff;
+ hose->controller_ops = pmac_pci_controller_ops;
+
+ disp_name = NULL;
+
+ /* 64 bits only bridges */
+#ifdef CONFIG_PPC64
+ if (of_device_is_compatible(dev, "u3-agp")) {
+ setup_u3_agp(hose);
+ disp_name = "U3-AGP";
+ primary = 0;
+ } else if (of_device_is_compatible(dev, "u3-ht")) {
+ setup_u3_ht(hose);
+ disp_name = "U3-HT";
+ primary = 1;
+ } else if (of_device_is_compatible(dev, "u4-pcie")) {
+ setup_u4_pcie(hose);
+ disp_name = "U4-PCIE";
+ primary = 0;
+ }
+ printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number:"
+ " %d->%d\n", disp_name, hose->first_busno, hose->last_busno);
+#endif /* CONFIG_PPC64 */
+
+ /* 32 bits only bridges */
+#ifdef CONFIG_PPC32
+ if (of_device_is_compatible(dev, "uni-north")) {
+ primary = setup_uninorth(hose, &rsrc);
+ disp_name = "UniNorth";
+ } else if (of_node_name_eq(dev, "pci")) {
+ /* XXX assume this is a mpc106 (grackle) */
+ setup_grackle(hose);
+ disp_name = "Grackle (MPC106)";
+ } else if (of_node_name_eq(dev, "bandit")) {
+ setup_bandit(hose, &rsrc);
+ disp_name = "Bandit";
+ } else if (of_node_name_eq(dev, "chaos")) {
+ setup_chaos(hose, &rsrc);
+ disp_name = "Chaos";
+ primary = 0;
+ }
+ printk(KERN_INFO "Found %s PCI host bridge at 0x%016llx. "
+ "Firmware bus number: %d->%d\n",
+ disp_name, (unsigned long long)rsrc.start, hose->first_busno,
+ hose->last_busno);
+#endif /* CONFIG_PPC32 */
+
+ DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n",
+ hose, hose->cfg_addr, hose->cfg_data);
+
+ /* Interpret the "ranges" property */
+ /* This also maps the I/O region and sets isa_io/mem_base */
+ pci_process_bridge_OF_ranges(hose, dev, primary);
+
+ /* Fixup "bus-range" OF property */
+ fixup_bus_range(dev);
+
+ /* create pci_dn's for DT nodes under this PHB */
+ if (IS_ENABLED(CONFIG_PPC64))
+ pci_devs_phb_init_dynamic(hose);
+
+ return 0;
+}
+
+void pmac_pci_irq_fixup(struct pci_dev *dev)
+{
+#ifdef CONFIG_PPC32
+ /* Fixup interrupt for the modem/ethernet combo controller.
+ * on machines with a second ohare chip.
+ * The number in the device tree (27) is bogus (correct for
+ * the ethernet-only board but not the combo ethernet/modem
+ * board). The real interrupt is 28 on the second controller
+ * -> 28+32 = 60.
+ */
+ if (has_second_ohare &&
+ dev->vendor == PCI_VENDOR_ID_DEC &&
+ dev->device == PCI_DEVICE_ID_DEC_TULIP_PLUS) {
+ dev->irq = irq_create_mapping(NULL, 60);
+ irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW);
+ }
+#endif /* CONFIG_PPC32 */
+}
+
+#ifdef CONFIG_PPC64
+static int pmac_pci_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+ struct pci_controller *hose = pci_bus_to_host(bridge->bus);
+ struct device_node *np, *child;
+
+ if (hose != u3_agp)
+ return 0;
+
+ /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We
+ * assume there is no P2P bridge on the AGP bus, which should be a
+ * safe assumptions for now. We should do something better in the
+ * future though
+ */
+ np = hose->dn;
+ PCI_DN(np)->busno = 0xf0;
+ for_each_child_of_node(np, child)
+ PCI_DN(child)->busno = 0xf0;
+
+ return 0;
+}
+#endif /* CONFIG_PPC64 */
+
+void __init pmac_pci_init(void)
+{
+ struct device_node *np, *root;
+ struct device_node *ht __maybe_unused = NULL;
+
+ pci_set_flags(PCI_CAN_SKIP_ISA_ALIGN);
+
+ root = of_find_node_by_path("/");
+ if (root == NULL) {
+ printk(KERN_CRIT "pmac_pci_init: can't find root "
+ "of device tree\n");
+ return;
+ }
+ for_each_child_of_node(root, np) {
+ if (of_node_name_eq(np, "bandit")
+ || of_node_name_eq(np, "chaos")
+ || of_node_name_eq(np, "pci")) {
+ if (pmac_add_bridge(np) == 0)
+ of_node_get(np);
+ }
+ if (of_node_name_eq(np, "ht")) {
+ of_node_get(np);
+ ht = np;
+ }
+ }
+ of_node_put(root);
+
+#ifdef CONFIG_PPC64
+ /* Probe HT last as it relies on the agp resources to be already
+ * setup
+ */
+ if (ht && pmac_add_bridge(ht) != 0)
+ of_node_put(ht);
+
+ ppc_md.pcibios_root_bridge_prepare = pmac_pci_root_bridge_prepare;
+ /* pmac_check_ht_link(); */
+
+#else /* CONFIG_PPC64 */
+ init_p2pbridge();
+ init_second_ohare();
+ fixup_nec_usb2();
+
+ /* We are still having some issues with the Xserve G4, enabling
+ * some offset between bus number and domains for now when we
+ * assign all busses should help for now
+ */
+ if (pci_has_flag(PCI_REASSIGN_ALL_BUS))
+ pcibios_assign_bus_offset = 0x10;
+#endif
+}
+
+#ifdef CONFIG_PPC32
+static bool pmac_pci_enable_device_hook(struct pci_dev *dev)
+{
+ struct device_node* node;
+ int updatecfg = 0;
+ int uninorth_child;
+
+ node = pci_device_to_OF_node(dev);
+
+ /* We don't want to enable USB controllers absent from the OF tree
+ * (iBook second controller)
+ */
+ if (dev->vendor == PCI_VENDOR_ID_APPLE
+ && dev->class == PCI_CLASS_SERIAL_USB_OHCI
+ && !node) {
+ printk(KERN_INFO "Apple USB OHCI %s disabled by firmware\n",
+ pci_name(dev));
+ return false;
+ }
+
+ if (!node)
+ return true;
+
+ uninorth_child = node->parent &&
+ of_device_is_compatible(node->parent, "uni-north");
+
+ /* Firewire & GMAC were disabled after PCI probe, the driver is
+ * claiming them, we must re-enable them now.
+ */
+ if (uninorth_child && of_node_name_eq(node, "firewire") &&
+ (of_device_is_compatible(node, "pci106b,18") ||
+ of_device_is_compatible(node, "pci106b,30") ||
+ of_device_is_compatible(node, "pci11c1,5811"))) {
+ pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, node, 0, 1);
+ pmac_call_feature(PMAC_FTR_1394_ENABLE, node, 0, 1);
+ updatecfg = 1;
+ }
+ if (uninorth_child && of_node_name_eq(node, "ethernet") &&
+ of_device_is_compatible(node, "gmac")) {
+ pmac_call_feature(PMAC_FTR_GMAC_ENABLE, node, 0, 1);
+ updatecfg = 1;
+ }
+
+ /*
+ * Fixup various header fields on 32 bits. We don't do that on
+ * 64 bits as some of these have strange values behind the HT
+ * bridge and we must not, for example, enable MWI or set the
+ * cache line size on them.
+ */
+ if (updatecfg) {
+ u16 cmd;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER
+ | PCI_COMMAND_INVALIDATE;
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ pci_write_config_byte(dev, PCI_LATENCY_TIMER, 16);
+
+ pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
+ L1_CACHE_BYTES >> 2);
+ }
+
+ return true;
+}
+
+static void pmac_pci_fixup_ohci(struct pci_dev *dev)
+{
+ struct device_node *node = pci_device_to_OF_node(dev);
+
+ /* We don't want to assign resources to USB controllers
+ * absent from the OF tree (iBook second controller)
+ */
+ if (dev->class == PCI_CLASS_SERIAL_USB_OHCI && !node)
+ dev->resource[0].flags = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_ANY_ID, pmac_pci_fixup_ohci);
+
+/* We power down some devices after they have been probed. They'll
+ * be powered back on later on
+ */
+void __init pmac_pcibios_after_init(void)
+{
+ struct device_node* nd;
+
+ for_each_node_by_name(nd, "firewire") {
+ if (nd->parent && (of_device_is_compatible(nd, "pci106b,18") ||
+ of_device_is_compatible(nd, "pci106b,30") ||
+ of_device_is_compatible(nd, "pci11c1,5811"))
+ && of_device_is_compatible(nd->parent, "uni-north")) {
+ pmac_call_feature(PMAC_FTR_1394_ENABLE, nd, 0, 0);
+ pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, nd, 0, 0);
+ }
+ }
+ for_each_node_by_name(nd, "ethernet") {
+ if (nd->parent && of_device_is_compatible(nd, "gmac")
+ && of_device_is_compatible(nd->parent, "uni-north"))
+ pmac_call_feature(PMAC_FTR_GMAC_ENABLE, nd, 0, 0);
+ }
+}
+
+static void pmac_pci_fixup_cardbus(struct pci_dev *dev)
+{
+ if (!machine_is(powermac))
+ return;
+ /*
+ * Fix the interrupt routing on the various cardbus bridges
+ * used on powerbooks
+ */
+ if (dev->vendor != PCI_VENDOR_ID_TI)
+ return;
+ if (dev->device == PCI_DEVICE_ID_TI_1130 ||
+ dev->device == PCI_DEVICE_ID_TI_1131) {
+ u8 val;
+ /* Enable PCI interrupt */
+ if (pci_read_config_byte(dev, 0x91, &val) == 0)
+ pci_write_config_byte(dev, 0x91, val | 0x30);
+ /* Disable ISA interrupt mode */
+ if (pci_read_config_byte(dev, 0x92, &val) == 0)
+ pci_write_config_byte(dev, 0x92, val & ~0x06);
+ }
+ if (dev->device == PCI_DEVICE_ID_TI_1210 ||
+ dev->device == PCI_DEVICE_ID_TI_1211 ||
+ dev->device == PCI_DEVICE_ID_TI_1410 ||
+ dev->device == PCI_DEVICE_ID_TI_1510) {
+ u8 val;
+ /* 0x8c == TI122X_IRQMUX, 2 says to route the INTA
+ signal out the MFUNC0 pin */
+ if (pci_read_config_byte(dev, 0x8c, &val) == 0)
+ pci_write_config_byte(dev, 0x8c, (val & ~0x0f) | 2);
+ /* Disable ISA interrupt mode */
+ if (pci_read_config_byte(dev, 0x92, &val) == 0)
+ pci_write_config_byte(dev, 0x92, val & ~0x06);
+ }
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus);
+
+static void pmac_pci_fixup_pciata(struct pci_dev *dev)
+{
+ u8 progif = 0;
+
+ /*
+ * On PowerMacs, we try to switch any PCI ATA controller to
+ * fully native mode
+ */
+ if (!machine_is(powermac))
+ return;
+
+ /* Some controllers don't have the class IDE */
+ if (dev->vendor == PCI_VENDOR_ID_PROMISE)
+ switch(dev->device) {
+ case PCI_DEVICE_ID_PROMISE_20246:
+ case PCI_DEVICE_ID_PROMISE_20262:
+ case PCI_DEVICE_ID_PROMISE_20263:
+ case PCI_DEVICE_ID_PROMISE_20265:
+ case PCI_DEVICE_ID_PROMISE_20267:
+ case PCI_DEVICE_ID_PROMISE_20268:
+ case PCI_DEVICE_ID_PROMISE_20269:
+ case PCI_DEVICE_ID_PROMISE_20270:
+ case PCI_DEVICE_ID_PROMISE_20271:
+ case PCI_DEVICE_ID_PROMISE_20275:
+ case PCI_DEVICE_ID_PROMISE_20276:
+ case PCI_DEVICE_ID_PROMISE_20277:
+ goto good;
+ }
+ /* Others, check PCI class */
+ if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
+ return;
+ good:
+ pci_read_config_byte(dev, PCI_CLASS_PROG, &progif);
+ if ((progif & 5) != 5) {
+ printk(KERN_INFO "PCI: %s Forcing PCI IDE into native mode\n",
+ pci_name(dev));
+ (void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5);
+ if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) ||
+ (progif & 5) != 5)
+ printk(KERN_ERR "Rewrite of PROGIF failed !\n");
+ else {
+ /* Clear IO BARs, they will be reassigned */
+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, 0);
+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, 0);
+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_2, 0);
+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_3, 0);
+ }
+ }
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pmac_pci_fixup_pciata);
+#endif /* CONFIG_PPC32 */
+
+/*
+ * Disable second function on K2-SATA, it's broken
+ * and disable IO BARs on first one
+ */
+static void fixup_k2_sata(struct pci_dev* dev)
+{
+ int i;
+ u16 cmd;
+
+ if (PCI_FUNC(dev->devfn) > 0) {
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ for (i = 0; i < 6; i++) {
+ dev->resource[i].start = dev->resource[i].end = 0;
+ dev->resource[i].flags = 0;
+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i,
+ 0);
+ }
+ } else {
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ cmd &= ~PCI_COMMAND_IO;
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ for (i = 0; i < 5; i++) {
+ dev->resource[i].start = dev->resource[i].end = 0;
+ dev->resource[i].flags = 0;
+ pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i,
+ 0);
+ }
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata);
+
+/*
+ * On U4 (aka CPC945) the PCIe root complex "P2P" bridge resource ranges aren't
+ * configured by the firmware. The bridge itself seems to ignore them but it
+ * causes problems with Linux which then re-assigns devices below the bridge,
+ * thus changing addresses of those devices from what was in the device-tree,
+ * which sucks when those are video cards using offb
+ *
+ * We could just mark it transparent but I prefer fixing up the resources to
+ * properly show what's going on here, as I have some doubts about having them
+ * badly configured potentially being an issue for DMA.
+ *
+ * We leave PIO alone, it seems to be fine
+ *
+ * Oh and there's another funny bug. The OF properties advertize the region
+ * 0xf1000000..0xf1ffffff as being forwarded as memory space. But that's
+ * actually not true, this region is the memory mapped config space. So we
+ * also need to filter it out or we'll map things in the wrong place.
+ */
+static void fixup_u4_pcie(struct pci_dev* dev)
+{
+ struct pci_controller *host = pci_bus_to_host(dev->bus);
+ struct resource *region = NULL;
+ u32 reg;
+ int i;
+
+ /* Only do that on PowerMac */
+ if (!machine_is(powermac))
+ return;
+
+ /* Find the largest MMIO region */
+ for (i = 0; i < 3; i++) {
+ struct resource *r = &host->mem_resources[i];
+ if (!(r->flags & IORESOURCE_MEM))
+ continue;
+ /* Skip the 0xf0xxxxxx..f2xxxxxx regions, we know they
+ * are reserved by HW for other things
+ */
+ if (r->start >= 0xf0000000 && r->start < 0xf3000000)
+ continue;
+ if (!region || resource_size(r) > resource_size(region))
+ region = r;
+ }
+ /* Nothing found, bail */
+ if (!region)
+ return;
+
+ /* Print things out */
+ printk(KERN_INFO "PCI: Fixup U4 PCIe bridge range: %pR\n", region);
+
+ /* Fixup bridge config space. We know it's a Mac, resource aren't
+ * offset so let's just blast them as-is. We also know that they
+ * fit in 32 bits
+ */
+ reg = ((region->start >> 16) & 0xfff0) | (region->end & 0xfff00000);
+ pci_write_config_dword(dev, PCI_MEMORY_BASE, reg);
+ pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0);
+ pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+ pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_U4_PCIE, fixup_u4_pcie);
+
+#ifdef CONFIG_PPC64
+static int pmac_pci_probe_mode(struct pci_bus *bus)
+{
+ struct device_node *node = pci_bus_to_OF_node(bus);
+
+ /* We need to use normal PCI probing for the AGP bus,
+ * since the device for the AGP bridge isn't in the tree.
+ * Same for the PCIe host on U4 and the HT host bridge.
+ */
+ if (bus->self == NULL && (of_device_is_compatible(node, "u3-agp") ||
+ of_device_is_compatible(node, "u4-pcie") ||
+ of_device_is_compatible(node, "u3-ht")))
+ return PCI_PROBE_NORMAL;
+ return PCI_PROBE_DEVTREE;
+}
+#endif /* CONFIG_PPC64 */
+
+struct pci_controller_ops pmac_pci_controller_ops = {
+#ifdef CONFIG_PPC64
+ .probe_mode = pmac_pci_probe_mode,
+#endif
+#ifdef CONFIG_PPC32
+ .enable_device_hook = pmac_pci_enable_device_hook,
+#endif
+};
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
new file mode 100644
index 000000000..085e0ad20
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/of_irq.h>
+
+#include <asm/pmac_feature.h>
+#include <asm/pmac_pfunc.h>
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static irqreturn_t macio_gpio_irq(int irq, void *data)
+{
+ pmf_do_irq(data);
+
+ return IRQ_HANDLED;
+}
+
+static int macio_do_gpio_irq_enable(struct pmf_function *func)
+{
+ unsigned int irq = irq_of_parse_and_map(func->node, 0);
+ if (!irq)
+ return -EINVAL;
+ return request_irq(irq, macio_gpio_irq, 0, func->node->name, func);
+}
+
+static int macio_do_gpio_irq_disable(struct pmf_function *func)
+{
+ unsigned int irq = irq_of_parse_and_map(func->node, 0);
+ if (!irq)
+ return -EINVAL;
+ free_irq(irq, func);
+ return 0;
+}
+
+static int macio_do_gpio_write(PMF_STD_ARGS, u8 value, u8 mask)
+{
+ u8 __iomem *addr = (u8 __iomem *)func->driver_data;
+ unsigned long flags;
+ u8 tmp;
+
+ /* Check polarity */
+ if (args && args->count && !args->u[0].v)
+ value = ~value;
+
+ /* Toggle the GPIO */
+ raw_spin_lock_irqsave(&feature_lock, flags);
+ tmp = readb(addr);
+ tmp = (tmp & ~mask) | (value & mask);
+ DBG("Do write 0x%02x to GPIO %pOF (%p)\n",
+ tmp, func->node, addr);
+ writeb(tmp, addr);
+ raw_spin_unlock_irqrestore(&feature_lock, flags);
+
+ return 0;
+}
+
+static int macio_do_gpio_read(PMF_STD_ARGS, u8 mask, int rshift, u8 xor)
+{
+ u8 __iomem *addr = (u8 __iomem *)func->driver_data;
+ u32 value;
+
+ /* Check if we have room for reply */
+ if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+ return -EINVAL;
+
+ value = readb(addr);
+ *args->u[0].p = ((value & mask) >> rshift) ^ xor;
+
+ return 0;
+}
+
+static int macio_do_delay(PMF_STD_ARGS, u32 duration)
+{
+ /* assume we can sleep ! */
+ msleep((duration + 999) / 1000);
+ return 0;
+}
+
+static struct pmf_handlers macio_gpio_handlers = {
+ .irq_enable = macio_do_gpio_irq_enable,
+ .irq_disable = macio_do_gpio_irq_disable,
+ .write_gpio = macio_do_gpio_write,
+ .read_gpio = macio_do_gpio_read,
+ .delay = macio_do_delay,
+};
+
+static void __init macio_gpio_init_one(struct macio_chip *macio)
+{
+ struct device_node *gparent, *gp;
+
+ /*
+ * Find the "gpio" parent node
+ */
+
+ for_each_child_of_node(macio->of_node, gparent)
+ if (of_node_name_eq(gparent, "gpio"))
+ break;
+ if (gparent == NULL)
+ return;
+
+ DBG("Installing GPIO functions for macio %pOF\n",
+ macio->of_node);
+
+ /*
+ * Ok, got one, we dont need anything special to track them down, so
+ * we just create them all
+ */
+ for_each_child_of_node(gparent, gp) {
+ const u32 *reg = of_get_property(gp, "reg", NULL);
+ unsigned long offset;
+ if (reg == NULL)
+ continue;
+ offset = *reg;
+ /* Deal with old style device-tree. We can safely hard code the
+ * offset for now too even if it's a bit gross ...
+ */
+ if (offset < 0x50)
+ offset += 0x50;
+ offset += (unsigned long)macio->base;
+ pmf_register_driver(gp, &macio_gpio_handlers, (void *)offset);
+ }
+
+ DBG("Calling initial GPIO functions for macio %pOF\n",
+ macio->of_node);
+
+ /* And now we run all the init ones */
+ for_each_child_of_node(gparent, gp)
+ pmf_do_functions(gp, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
+
+ of_node_put(gparent);
+
+ /* Note: We do not at this point implement the "at sleep" or "at wake"
+ * functions. I yet to find any for GPIOs anyway
+ */
+}
+
+static int macio_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask)
+{
+ struct macio_chip *macio = func->driver_data;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&feature_lock, flags);
+ MACIO_OUT32(offset, (MACIO_IN32(offset) & ~mask) | (value & mask));
+ raw_spin_unlock_irqrestore(&feature_lock, flags);
+ return 0;
+}
+
+static int macio_do_read_reg32(PMF_STD_ARGS, u32 offset)
+{
+ struct macio_chip *macio = func->driver_data;
+
+ /* Check if we have room for reply */
+ if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+ return -EINVAL;
+
+ *args->u[0].p = MACIO_IN32(offset);
+ return 0;
+}
+
+static int macio_do_write_reg8(PMF_STD_ARGS, u32 offset, u8 value, u8 mask)
+{
+ struct macio_chip *macio = func->driver_data;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&feature_lock, flags);
+ MACIO_OUT8(offset, (MACIO_IN8(offset) & ~mask) | (value & mask));
+ raw_spin_unlock_irqrestore(&feature_lock, flags);
+ return 0;
+}
+
+static int macio_do_read_reg8(PMF_STD_ARGS, u32 offset)
+{
+ struct macio_chip *macio = func->driver_data;
+
+ /* Check if we have room for reply */
+ if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+ return -EINVAL;
+
+ *((u8 *)(args->u[0].p)) = MACIO_IN8(offset);
+ return 0;
+}
+
+static int macio_do_read_reg32_msrx(PMF_STD_ARGS, u32 offset, u32 mask,
+ u32 shift, u32 xor)
+{
+ struct macio_chip *macio = func->driver_data;
+
+ /* Check if we have room for reply */
+ if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+ return -EINVAL;
+
+ *args->u[0].p = ((MACIO_IN32(offset) & mask) >> shift) ^ xor;
+ return 0;
+}
+
+static int macio_do_read_reg8_msrx(PMF_STD_ARGS, u32 offset, u32 mask,
+ u32 shift, u32 xor)
+{
+ struct macio_chip *macio = func->driver_data;
+
+ /* Check if we have room for reply */
+ if (args == NULL || args->count == 0 || args->u[0].p == NULL)
+ return -EINVAL;
+
+ *((u8 *)(args->u[0].p)) = ((MACIO_IN8(offset) & mask) >> shift) ^ xor;
+ return 0;
+}
+
+static int macio_do_write_reg32_slm(PMF_STD_ARGS, u32 offset, u32 shift,
+ u32 mask)
+{
+ struct macio_chip *macio = func->driver_data;
+ unsigned long flags;
+ u32 tmp, val;
+
+ /* Check args */
+ if (args == NULL || args->count == 0)
+ return -EINVAL;
+
+ raw_spin_lock_irqsave(&feature_lock, flags);
+ tmp = MACIO_IN32(offset);
+ val = args->u[0].v << shift;
+ tmp = (tmp & ~mask) | (val & mask);
+ MACIO_OUT32(offset, tmp);
+ raw_spin_unlock_irqrestore(&feature_lock, flags);
+ return 0;
+}
+
+static int macio_do_write_reg8_slm(PMF_STD_ARGS, u32 offset, u32 shift,
+ u32 mask)
+{
+ struct macio_chip *macio = func->driver_data;
+ unsigned long flags;
+ u32 tmp, val;
+
+ /* Check args */
+ if (args == NULL || args->count == 0)
+ return -EINVAL;
+
+ raw_spin_lock_irqsave(&feature_lock, flags);
+ tmp = MACIO_IN8(offset);
+ val = args->u[0].v << shift;
+ tmp = (tmp & ~mask) | (val & mask);
+ MACIO_OUT8(offset, tmp);
+ raw_spin_unlock_irqrestore(&feature_lock, flags);
+ return 0;
+}
+
+static struct pmf_handlers macio_mmio_handlers = {
+ .write_reg32 = macio_do_write_reg32,
+ .read_reg32 = macio_do_read_reg32,
+ .write_reg8 = macio_do_write_reg8,
+ .read_reg8 = macio_do_read_reg8,
+ .read_reg32_msrx = macio_do_read_reg32_msrx,
+ .read_reg8_msrx = macio_do_read_reg8_msrx,
+ .write_reg32_slm = macio_do_write_reg32_slm,
+ .write_reg8_slm = macio_do_write_reg8_slm,
+ .delay = macio_do_delay,
+};
+
+static void __init macio_mmio_init_one(struct macio_chip *macio)
+{
+ DBG("Installing MMIO functions for macio %pOF\n",
+ macio->of_node);
+
+ pmf_register_driver(macio->of_node, &macio_mmio_handlers, macio);
+}
+
+static struct device_node *unin_hwclock;
+
+static int unin_do_write_reg32(PMF_STD_ARGS, u32 offset, u32 value, u32 mask)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&feature_lock, flags);
+ /* This is fairly bogus in darwin, but it should work for our needs
+ * implemeted that way:
+ */
+ UN_OUT(offset, (UN_IN(offset) & ~mask) | (value & mask));
+ raw_spin_unlock_irqrestore(&feature_lock, flags);
+ return 0;
+}
+
+
+static struct pmf_handlers unin_mmio_handlers = {
+ .write_reg32 = unin_do_write_reg32,
+ .delay = macio_do_delay,
+};
+
+static void __init uninorth_install_pfunc(void)
+{
+ struct device_node *np;
+
+ DBG("Installing functions for UniN %pOF\n",
+ uninorth_node);
+
+ /*
+ * Install handlers for the bridge itself
+ */
+ pmf_register_driver(uninorth_node, &unin_mmio_handlers, NULL);
+ pmf_do_functions(uninorth_node, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
+
+
+ /*
+ * Install handlers for the hwclock child if any
+ */
+ for (np = NULL; (np = of_get_next_child(uninorth_node, np)) != NULL;)
+ if (of_node_name_eq(np, "hw-clock")) {
+ unin_hwclock = np;
+ break;
+ }
+ if (unin_hwclock) {
+ DBG("Installing functions for UniN clock %pOF\n",
+ unin_hwclock);
+ pmf_register_driver(unin_hwclock, &unin_mmio_handlers, NULL);
+ pmf_do_functions(unin_hwclock, NULL, 0, PMF_FLAGS_ON_INIT,
+ NULL);
+ }
+}
+
+/* We export this as the SMP code might init us early */
+int __init pmac_pfunc_base_install(void)
+{
+ static int pfbase_inited;
+ int i;
+
+ if (pfbase_inited)
+ return 0;
+ pfbase_inited = 1;
+
+ if (!machine_is(powermac))
+ return 0;
+
+ DBG("Installing base platform functions...\n");
+
+ /*
+ * Locate mac-io chips and install handlers
+ */
+ for (i = 0 ; i < MAX_MACIO_CHIPS; i++) {
+ if (macio_chips[i].of_node) {
+ macio_mmio_init_one(&macio_chips[i]);
+ macio_gpio_init_one(&macio_chips[i]);
+ }
+ }
+
+ /*
+ * Install handlers for northbridge and direct mapped hwclock
+ * if any. We do not implement the config space access callback
+ * which is only ever used for functions that we do not call in
+ * the current driver (enabling/disabling cells in U2, mostly used
+ * to restore the PCI settings, we do that differently)
+ */
+ if (uninorth_node && uninorth_base)
+ uninorth_install_pfunc();
+
+ DBG("All base functions installed\n");
+
+ return 0;
+}
+machine_arch_initcall(powermac, pmac_pfunc_base_install);
+
+#ifdef CONFIG_PM
+
+/* Those can be called by pmac_feature. Ultimately, I should use a sysdev
+ * or a device, but for now, that's good enough until I sort out some
+ * ordering issues. Also, we do not bother with GPIOs, as so far I yet have
+ * to see a case where a GPIO function has the on-suspend or on-resume bit
+ */
+void pmac_pfunc_base_suspend(void)
+{
+ int i;
+
+ for (i = 0 ; i < MAX_MACIO_CHIPS; i++) {
+ if (macio_chips[i].of_node)
+ pmf_do_functions(macio_chips[i].of_node, NULL, 0,
+ PMF_FLAGS_ON_SLEEP, NULL);
+ }
+ if (uninorth_node)
+ pmf_do_functions(uninorth_node, NULL, 0,
+ PMF_FLAGS_ON_SLEEP, NULL);
+ if (unin_hwclock)
+ pmf_do_functions(unin_hwclock, NULL, 0,
+ PMF_FLAGS_ON_SLEEP, NULL);
+}
+
+void pmac_pfunc_base_resume(void)
+{
+ int i;
+
+ if (unin_hwclock)
+ pmf_do_functions(unin_hwclock, NULL, 0,
+ PMF_FLAGS_ON_WAKE, NULL);
+ if (uninorth_node)
+ pmf_do_functions(uninorth_node, NULL, 0,
+ PMF_FLAGS_ON_WAKE, NULL);
+ for (i = 0 ; i < MAX_MACIO_CHIPS; i++) {
+ if (macio_chips[i].of_node)
+ pmf_do_functions(macio_chips[i].of_node, NULL, 0,
+ PMF_FLAGS_ON_WAKE, NULL);
+ }
+}
+
+#endif /* CONFIG_PM */
diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
new file mode 100644
index 000000000..22741ddfd
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -0,0 +1,1022 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * FIXME: Properly make this race free with refcounting etc...
+ *
+ * FIXME: LOCKING !!!
+ */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+
+#include <asm/pmac_pfunc.h>
+
+/* Debug */
+#define LOG_PARSE(fmt...)
+#define LOG_ERROR(fmt...) printk(fmt)
+#define LOG_BLOB(t,b,c)
+
+#undef DEBUG
+#ifdef DEBUG
+#define DBG(fmt...) printk(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+/* Command numbers */
+#define PMF_CMD_LIST 0
+#define PMF_CMD_WRITE_GPIO 1
+#define PMF_CMD_READ_GPIO 2
+#define PMF_CMD_WRITE_REG32 3
+#define PMF_CMD_READ_REG32 4
+#define PMF_CMD_WRITE_REG16 5
+#define PMF_CMD_READ_REG16 6
+#define PMF_CMD_WRITE_REG8 7
+#define PMF_CMD_READ_REG8 8
+#define PMF_CMD_DELAY 9
+#define PMF_CMD_WAIT_REG32 10
+#define PMF_CMD_WAIT_REG16 11
+#define PMF_CMD_WAIT_REG8 12
+#define PMF_CMD_READ_I2C 13
+#define PMF_CMD_WRITE_I2C 14
+#define PMF_CMD_RMW_I2C 15
+#define PMF_CMD_GEN_I2C 16
+#define PMF_CMD_SHIFT_BYTES_RIGHT 17
+#define PMF_CMD_SHIFT_BYTES_LEFT 18
+#define PMF_CMD_READ_CFG 19
+#define PMF_CMD_WRITE_CFG 20
+#define PMF_CMD_RMW_CFG 21
+#define PMF_CMD_READ_I2C_SUBADDR 22
+#define PMF_CMD_WRITE_I2C_SUBADDR 23
+#define PMF_CMD_SET_I2C_MODE 24
+#define PMF_CMD_RMW_I2C_SUBADDR 25
+#define PMF_CMD_READ_REG32_MASK_SHR_XOR 26
+#define PMF_CMD_READ_REG16_MASK_SHR_XOR 27
+#define PMF_CMD_READ_REG8_MASK_SHR_XOR 28
+#define PMF_CMD_WRITE_REG32_SHL_MASK 29
+#define PMF_CMD_WRITE_REG16_SHL_MASK 30
+#define PMF_CMD_WRITE_REG8_SHL_MASK 31
+#define PMF_CMD_MASK_AND_COMPARE 32
+#define PMF_CMD_COUNT 33
+
+/* This structure holds the state of the parser while walking through
+ * a function definition
+ */
+struct pmf_cmd {
+ const void *cmdptr;
+ const void *cmdend;
+ struct pmf_function *func;
+ void *instdata;
+ struct pmf_args *args;
+ int error;
+};
+
+#if 0
+/* Debug output */
+static void print_blob(const char *title, const void *blob, int bytes)
+{
+ printk("%s", title);
+ while(bytes--) {
+ printk("%02x ", *((u8 *)blob));
+ blob += 1;
+ }
+ printk("\n");
+}
+#endif
+
+/*
+ * Parser helpers
+ */
+
+static u32 pmf_next32(struct pmf_cmd *cmd)
+{
+ u32 value;
+ if ((cmd->cmdend - cmd->cmdptr) < 4) {
+ cmd->error = 1;
+ return 0;
+ }
+ value = *((u32 *)cmd->cmdptr);
+ cmd->cmdptr += 4;
+ return value;
+}
+
+static const void* pmf_next_blob(struct pmf_cmd *cmd, int count)
+{
+ const void *value;
+ if ((cmd->cmdend - cmd->cmdptr) < count) {
+ cmd->error = 1;
+ return NULL;
+ }
+ value = cmd->cmdptr;
+ cmd->cmdptr += count;
+ return value;
+}
+
+/*
+ * Individual command parsers
+ */
+
+#define PMF_PARSE_CALL(name, cmd, handlers, p...) \
+ do { \
+ if (cmd->error) \
+ return -ENXIO; \
+ if (handlers == NULL) \
+ return 0; \
+ if (handlers->name) \
+ return handlers->name(cmd->func, cmd->instdata, \
+ cmd->args, p); \
+ return -1; \
+ } while(0) \
+
+
+static int pmf_parser_write_gpio(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u8 value = (u8)pmf_next32(cmd);
+ u8 mask = (u8)pmf_next32(cmd);
+
+ LOG_PARSE("pmf: write_gpio(value: %02x, mask: %02x)\n", value, mask);
+
+ PMF_PARSE_CALL(write_gpio, cmd, h, value, mask);
+}
+
+static int pmf_parser_read_gpio(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u8 mask = (u8)pmf_next32(cmd);
+ int rshift = (int)pmf_next32(cmd);
+ u8 xor = (u8)pmf_next32(cmd);
+
+ LOG_PARSE("pmf: read_gpio(mask: %02x, rshift: %d, xor: %02x)\n",
+ mask, rshift, xor);
+
+ PMF_PARSE_CALL(read_gpio, cmd, h, mask, rshift, xor);
+}
+
+static int pmf_parser_write_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 value = pmf_next32(cmd);
+ u32 mask = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: write_reg32(offset: %08x, value: %08x, mask: %08x)\n",
+ offset, value, mask);
+
+ PMF_PARSE_CALL(write_reg32, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: read_reg32(offset: %08x)\n", offset);
+
+ PMF_PARSE_CALL(read_reg32, cmd, h, offset);
+}
+
+
+static int pmf_parser_write_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u16 value = (u16)pmf_next32(cmd);
+ u16 mask = (u16)pmf_next32(cmd);
+
+ LOG_PARSE("pmf: write_reg16(offset: %08x, value: %04x, mask: %04x)\n",
+ offset, value, mask);
+
+ PMF_PARSE_CALL(write_reg16, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: read_reg16(offset: %08x)\n", offset);
+
+ PMF_PARSE_CALL(read_reg16, cmd, h, offset);
+}
+
+
+static int pmf_parser_write_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u8 value = (u16)pmf_next32(cmd);
+ u8 mask = (u16)pmf_next32(cmd);
+
+ LOG_PARSE("pmf: write_reg8(offset: %08x, value: %02x, mask: %02x)\n",
+ offset, value, mask);
+
+ PMF_PARSE_CALL(write_reg8, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: read_reg8(offset: %08x)\n", offset);
+
+ PMF_PARSE_CALL(read_reg8, cmd, h, offset);
+}
+
+static int pmf_parser_delay(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 duration = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: delay(duration: %d us)\n", duration);
+
+ PMF_PARSE_CALL(delay, cmd, h, duration);
+}
+
+static int pmf_parser_wait_reg32(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 value = pmf_next32(cmd);
+ u32 mask = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: wait_reg32(offset: %08x, comp_value: %08x,mask: %08x)\n",
+ offset, value, mask);
+
+ PMF_PARSE_CALL(wait_reg32, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_wait_reg16(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u16 value = (u16)pmf_next32(cmd);
+ u16 mask = (u16)pmf_next32(cmd);
+
+ LOG_PARSE("pmf: wait_reg16(offset: %08x, comp_value: %04x,mask: %04x)\n",
+ offset, value, mask);
+
+ PMF_PARSE_CALL(wait_reg16, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_wait_reg8(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u8 value = (u8)pmf_next32(cmd);
+ u8 mask = (u8)pmf_next32(cmd);
+
+ LOG_PARSE("pmf: wait_reg8(offset: %08x, comp_value: %02x,mask: %02x)\n",
+ offset, value, mask);
+
+ PMF_PARSE_CALL(wait_reg8, cmd, h, offset, value, mask);
+}
+
+static int pmf_parser_read_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 bytes = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: read_i2c(bytes: %ud)\n", bytes);
+
+ PMF_PARSE_CALL(read_i2c, cmd, h, bytes);
+}
+
+static int pmf_parser_write_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 bytes = pmf_next32(cmd);
+ const void *blob = pmf_next_blob(cmd, bytes);
+
+ LOG_PARSE("pmf: write_i2c(bytes: %ud) ...\n", bytes);
+ LOG_BLOB("pmf: data: \n", blob, bytes);
+
+ PMF_PARSE_CALL(write_i2c, cmd, h, bytes, blob);
+}
+
+
+static int pmf_parser_rmw_i2c(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 maskbytes = pmf_next32(cmd);
+ u32 valuesbytes = pmf_next32(cmd);
+ u32 totalbytes = pmf_next32(cmd);
+ const void *maskblob = pmf_next_blob(cmd, maskbytes);
+ const void *valuesblob = pmf_next_blob(cmd, valuesbytes);
+
+ LOG_PARSE("pmf: rmw_i2c(maskbytes: %ud, valuebytes: %ud, "
+ "totalbytes: %d) ...\n",
+ maskbytes, valuesbytes, totalbytes);
+ LOG_BLOB("pmf: mask data: \n", maskblob, maskbytes);
+ LOG_BLOB("pmf: values data: \n", valuesblob, valuesbytes);
+
+ PMF_PARSE_CALL(rmw_i2c, cmd, h, maskbytes, valuesbytes, totalbytes,
+ maskblob, valuesblob);
+}
+
+static int pmf_parser_read_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 bytes = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: read_cfg(offset: %x, bytes: %ud)\n", offset, bytes);
+
+ PMF_PARSE_CALL(read_cfg, cmd, h, offset, bytes);
+}
+
+
+static int pmf_parser_write_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 bytes = pmf_next32(cmd);
+ const void *blob = pmf_next_blob(cmd, bytes);
+
+ LOG_PARSE("pmf: write_cfg(offset: %x, bytes: %ud)\n", offset, bytes);
+ LOG_BLOB("pmf: data: \n", blob, bytes);
+
+ PMF_PARSE_CALL(write_cfg, cmd, h, offset, bytes, blob);
+}
+
+static int pmf_parser_rmw_cfg(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 maskbytes = pmf_next32(cmd);
+ u32 valuesbytes = pmf_next32(cmd);
+ u32 totalbytes = pmf_next32(cmd);
+ const void *maskblob = pmf_next_blob(cmd, maskbytes);
+ const void *valuesblob = pmf_next_blob(cmd, valuesbytes);
+
+ LOG_PARSE("pmf: rmw_cfg(maskbytes: %ud, valuebytes: %ud,"
+ " totalbytes: %d) ...\n",
+ maskbytes, valuesbytes, totalbytes);
+ LOG_BLOB("pmf: mask data: \n", maskblob, maskbytes);
+ LOG_BLOB("pmf: values data: \n", valuesblob, valuesbytes);
+
+ PMF_PARSE_CALL(rmw_cfg, cmd, h, offset, maskbytes, valuesbytes,
+ totalbytes, maskblob, valuesblob);
+}
+
+
+static int pmf_parser_read_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u8 subaddr = (u8)pmf_next32(cmd);
+ u32 bytes = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: read_i2c_sub(subaddr: %x, bytes: %ud)\n",
+ subaddr, bytes);
+
+ PMF_PARSE_CALL(read_i2c_sub, cmd, h, subaddr, bytes);
+}
+
+static int pmf_parser_write_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u8 subaddr = (u8)pmf_next32(cmd);
+ u32 bytes = pmf_next32(cmd);
+ const void *blob = pmf_next_blob(cmd, bytes);
+
+ LOG_PARSE("pmf: write_i2c_sub(subaddr: %x, bytes: %ud) ...\n",
+ subaddr, bytes);
+ LOG_BLOB("pmf: data: \n", blob, bytes);
+
+ PMF_PARSE_CALL(write_i2c_sub, cmd, h, subaddr, bytes, blob);
+}
+
+static int pmf_parser_set_i2c_mode(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u32 mode = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: set_i2c_mode(mode: %d)\n", mode);
+
+ PMF_PARSE_CALL(set_i2c_mode, cmd, h, mode);
+}
+
+
+static int pmf_parser_rmw_i2c_sub(struct pmf_cmd *cmd, struct pmf_handlers *h)
+{
+ u8 subaddr = (u8)pmf_next32(cmd);
+ u32 maskbytes = pmf_next32(cmd);
+ u32 valuesbytes = pmf_next32(cmd);
+ u32 totalbytes = pmf_next32(cmd);
+ const void *maskblob = pmf_next_blob(cmd, maskbytes);
+ const void *valuesblob = pmf_next_blob(cmd, valuesbytes);
+
+ LOG_PARSE("pmf: rmw_i2c_sub(subaddr: %x, maskbytes: %ud, valuebytes: %ud"
+ ", totalbytes: %d) ...\n",
+ subaddr, maskbytes, valuesbytes, totalbytes);
+ LOG_BLOB("pmf: mask data: \n", maskblob, maskbytes);
+ LOG_BLOB("pmf: values data: \n", valuesblob, valuesbytes);
+
+ PMF_PARSE_CALL(rmw_i2c_sub, cmd, h, subaddr, maskbytes, valuesbytes,
+ totalbytes, maskblob, valuesblob);
+}
+
+static int pmf_parser_read_reg32_msrx(struct pmf_cmd *cmd,
+ struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 mask = pmf_next32(cmd);
+ u32 shift = pmf_next32(cmd);
+ u32 xor = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: read_reg32_msrx(offset: %x, mask: %x, shift: %x,"
+ " xor: %x\n", offset, mask, shift, xor);
+
+ PMF_PARSE_CALL(read_reg32_msrx, cmd, h, offset, mask, shift, xor);
+}
+
+static int pmf_parser_read_reg16_msrx(struct pmf_cmd *cmd,
+ struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 mask = pmf_next32(cmd);
+ u32 shift = pmf_next32(cmd);
+ u32 xor = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: read_reg16_msrx(offset: %x, mask: %x, shift: %x,"
+ " xor: %x\n", offset, mask, shift, xor);
+
+ PMF_PARSE_CALL(read_reg16_msrx, cmd, h, offset, mask, shift, xor);
+}
+static int pmf_parser_read_reg8_msrx(struct pmf_cmd *cmd,
+ struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 mask = pmf_next32(cmd);
+ u32 shift = pmf_next32(cmd);
+ u32 xor = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: read_reg8_msrx(offset: %x, mask: %x, shift: %x,"
+ " xor: %x\n", offset, mask, shift, xor);
+
+ PMF_PARSE_CALL(read_reg8_msrx, cmd, h, offset, mask, shift, xor);
+}
+
+static int pmf_parser_write_reg32_slm(struct pmf_cmd *cmd,
+ struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 shift = pmf_next32(cmd);
+ u32 mask = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: write_reg32_slm(offset: %x, shift: %x, mask: %x\n",
+ offset, shift, mask);
+
+ PMF_PARSE_CALL(write_reg32_slm, cmd, h, offset, shift, mask);
+}
+
+static int pmf_parser_write_reg16_slm(struct pmf_cmd *cmd,
+ struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 shift = pmf_next32(cmd);
+ u32 mask = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: write_reg16_slm(offset: %x, shift: %x, mask: %x\n",
+ offset, shift, mask);
+
+ PMF_PARSE_CALL(write_reg16_slm, cmd, h, offset, shift, mask);
+}
+
+static int pmf_parser_write_reg8_slm(struct pmf_cmd *cmd,
+ struct pmf_handlers *h)
+{
+ u32 offset = pmf_next32(cmd);
+ u32 shift = pmf_next32(cmd);
+ u32 mask = pmf_next32(cmd);
+
+ LOG_PARSE("pmf: write_reg8_slm(offset: %x, shift: %x, mask: %x\n",
+ offset, shift, mask);
+
+ PMF_PARSE_CALL(write_reg8_slm, cmd, h, offset, shift, mask);
+}
+
+static int pmf_parser_mask_and_compare(struct pmf_cmd *cmd,
+ struct pmf_handlers *h)
+{
+ u32 bytes = pmf_next32(cmd);
+ const void *maskblob = pmf_next_blob(cmd, bytes);
+ const void *valuesblob = pmf_next_blob(cmd, bytes);
+
+ LOG_PARSE("pmf: mask_and_compare(length: %ud ...\n", bytes);
+ LOG_BLOB("pmf: mask data: \n", maskblob, bytes);
+ LOG_BLOB("pmf: values data: \n", valuesblob, bytes);
+
+ PMF_PARSE_CALL(mask_and_compare, cmd, h,
+ bytes, maskblob, valuesblob);
+}
+
+
+typedef int (*pmf_cmd_parser_t)(struct pmf_cmd *cmd, struct pmf_handlers *h);
+
+static pmf_cmd_parser_t pmf_parsers[PMF_CMD_COUNT] =
+{
+ NULL,
+ pmf_parser_write_gpio,
+ pmf_parser_read_gpio,
+ pmf_parser_write_reg32,
+ pmf_parser_read_reg32,
+ pmf_parser_write_reg16,
+ pmf_parser_read_reg16,
+ pmf_parser_write_reg8,
+ pmf_parser_read_reg8,
+ pmf_parser_delay,
+ pmf_parser_wait_reg32,
+ pmf_parser_wait_reg16,
+ pmf_parser_wait_reg8,
+ pmf_parser_read_i2c,
+ pmf_parser_write_i2c,
+ pmf_parser_rmw_i2c,
+ NULL, /* Bogus command */
+ NULL, /* Shift bytes right: NYI */
+ NULL, /* Shift bytes left: NYI */
+ pmf_parser_read_cfg,
+ pmf_parser_write_cfg,
+ pmf_parser_rmw_cfg,
+ pmf_parser_read_i2c_sub,
+ pmf_parser_write_i2c_sub,
+ pmf_parser_set_i2c_mode,
+ pmf_parser_rmw_i2c_sub,
+ pmf_parser_read_reg32_msrx,
+ pmf_parser_read_reg16_msrx,
+ pmf_parser_read_reg8_msrx,
+ pmf_parser_write_reg32_slm,
+ pmf_parser_write_reg16_slm,
+ pmf_parser_write_reg8_slm,
+ pmf_parser_mask_and_compare,
+};
+
+struct pmf_device {
+ struct list_head link;
+ struct device_node *node;
+ struct pmf_handlers *handlers;
+ struct list_head functions;
+ struct kref ref;
+};
+
+static LIST_HEAD(pmf_devices);
+static DEFINE_SPINLOCK(pmf_lock);
+static DEFINE_MUTEX(pmf_irq_mutex);
+
+static void pmf_release_device(struct kref *kref)
+{
+ struct pmf_device *dev = container_of(kref, struct pmf_device, ref);
+ kfree(dev);
+}
+
+static inline void pmf_put_device(struct pmf_device *dev)
+{
+ kref_put(&dev->ref, pmf_release_device);
+}
+
+static inline struct pmf_device *pmf_get_device(struct pmf_device *dev)
+{
+ kref_get(&dev->ref);
+ return dev;
+}
+
+static inline struct pmf_device *pmf_find_device(struct device_node *np)
+{
+ struct pmf_device *dev;
+
+ list_for_each_entry(dev, &pmf_devices, link) {
+ if (dev->node == np)
+ return pmf_get_device(dev);
+ }
+ return NULL;
+}
+
+static int pmf_parse_one(struct pmf_function *func,
+ struct pmf_handlers *handlers,
+ void *instdata, struct pmf_args *args)
+{
+ struct pmf_cmd cmd;
+ u32 ccode;
+ int count, rc;
+
+ cmd.cmdptr = func->data;
+ cmd.cmdend = func->data + func->length;
+ cmd.func = func;
+ cmd.instdata = instdata;
+ cmd.args = args;
+ cmd.error = 0;
+
+ LOG_PARSE("pmf: func %s, %d bytes, %s...\n",
+ func->name, func->length,
+ handlers ? "executing" : "parsing");
+
+ /* One subcommand to parse for now */
+ count = 1;
+
+ while(count-- && cmd.cmdptr < cmd.cmdend) {
+ /* Get opcode */
+ ccode = pmf_next32(&cmd);
+ /* Check if we are hitting a command list, fetch new count */
+ if (ccode == 0) {
+ count = pmf_next32(&cmd) - 1;
+ ccode = pmf_next32(&cmd);
+ }
+ if (cmd.error) {
+ LOG_ERROR("pmf: parse error, not enough data\n");
+ return -ENXIO;
+ }
+ if (ccode >= PMF_CMD_COUNT) {
+ LOG_ERROR("pmf: command code %d unknown !\n", ccode);
+ return -ENXIO;
+ }
+ if (pmf_parsers[ccode] == NULL) {
+ LOG_ERROR("pmf: no parser for command %d !\n", ccode);
+ return -ENXIO;
+ }
+ rc = pmf_parsers[ccode](&cmd, handlers);
+ if (rc != 0) {
+ LOG_ERROR("pmf: parser for command %d returned"
+ " error %d\n", ccode, rc);
+ return rc;
+ }
+ }
+
+ /* We are doing an initial parse pass, we need to adjust the size */
+ if (handlers == NULL)
+ func->length = cmd.cmdptr - func->data;
+
+ return 0;
+}
+
+static int pmf_add_function_prop(struct pmf_device *dev, void *driverdata,
+ const char *name, u32 *data,
+ unsigned int length)
+{
+ int count = 0;
+ struct pmf_function *func = NULL;
+
+ DBG("pmf: Adding functions for platform-do-%s\n", name);
+
+ while (length >= 12) {
+ /* Allocate a structure */
+ func = kzalloc(sizeof(*func), GFP_KERNEL);
+ if (func == NULL)
+ goto bail;
+ kref_init(&func->ref);
+ INIT_LIST_HEAD(&func->irq_clients);
+ func->node = dev->node;
+ func->driver_data = driverdata;
+ func->name = name;
+ func->phandle = data[0];
+ func->flags = data[1];
+ data += 2;
+ length -= 8;
+ func->data = data;
+ func->length = length;
+ func->dev = dev;
+ DBG("pmf: idx %d: flags=%08x, phandle=%08x "
+ " %d bytes remaining, parsing...\n",
+ count+1, func->flags, func->phandle, length);
+ if (pmf_parse_one(func, NULL, NULL, NULL)) {
+ kfree(func);
+ goto bail;
+ }
+ length -= func->length;
+ data = (u32 *)(((u8 *)data) + func->length);
+ list_add(&func->link, &dev->functions);
+ pmf_get_device(dev);
+ count++;
+ }
+ bail:
+ DBG("pmf: Added %d functions\n", count);
+
+ return count;
+}
+
+static int pmf_add_functions(struct pmf_device *dev, void *driverdata)
+{
+ struct property *pp;
+#define PP_PREFIX "platform-do-"
+ const int plen = strlen(PP_PREFIX);
+ int count = 0;
+
+ for_each_property_of_node(dev->node, pp) {
+ const char *name;
+ if (strncmp(pp->name, PP_PREFIX, plen) != 0)
+ continue;
+ name = pp->name + plen;
+ if (strlen(name) && pp->length >= 12)
+ count += pmf_add_function_prop(dev, driverdata, name,
+ pp->value, pp->length);
+ }
+ return count;
+}
+
+
+int pmf_register_driver(struct device_node *np,
+ struct pmf_handlers *handlers,
+ void *driverdata)
+{
+ struct pmf_device *dev;
+ unsigned long flags;
+ int rc = 0;
+
+ if (handlers == NULL)
+ return -EINVAL;
+
+ DBG("pmf: registering driver for node %pOF\n", np);
+
+ spin_lock_irqsave(&pmf_lock, flags);
+ dev = pmf_find_device(np);
+ spin_unlock_irqrestore(&pmf_lock, flags);
+ if (dev != NULL) {
+ DBG("pmf: already there !\n");
+ pmf_put_device(dev);
+ return -EBUSY;
+ }
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (dev == NULL) {
+ DBG("pmf: no memory !\n");
+ return -ENOMEM;
+ }
+ kref_init(&dev->ref);
+ dev->node = of_node_get(np);
+ dev->handlers = handlers;
+ INIT_LIST_HEAD(&dev->functions);
+
+ rc = pmf_add_functions(dev, driverdata);
+ if (rc == 0) {
+ DBG("pmf: no functions, disposing.. \n");
+ of_node_put(np);
+ kfree(dev);
+ return -ENODEV;
+ }
+
+ spin_lock_irqsave(&pmf_lock, flags);
+ list_add(&dev->link, &pmf_devices);
+ spin_unlock_irqrestore(&pmf_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pmf_register_driver);
+
+struct pmf_function *pmf_get_function(struct pmf_function *func)
+{
+ if (!try_module_get(func->dev->handlers->owner))
+ return NULL;
+ kref_get(&func->ref);
+ return func;
+}
+EXPORT_SYMBOL_GPL(pmf_get_function);
+
+static void pmf_release_function(struct kref *kref)
+{
+ struct pmf_function *func =
+ container_of(kref, struct pmf_function, ref);
+ pmf_put_device(func->dev);
+ kfree(func);
+}
+
+static inline void __pmf_put_function(struct pmf_function *func)
+{
+ kref_put(&func->ref, pmf_release_function);
+}
+
+void pmf_put_function(struct pmf_function *func)
+{
+ if (func == NULL)
+ return;
+ module_put(func->dev->handlers->owner);
+ __pmf_put_function(func);
+}
+EXPORT_SYMBOL_GPL(pmf_put_function);
+
+void pmf_unregister_driver(struct device_node *np)
+{
+ struct pmf_device *dev;
+ unsigned long flags;
+
+ DBG("pmf: unregistering driver for node %pOF\n", np);
+
+ spin_lock_irqsave(&pmf_lock, flags);
+ dev = pmf_find_device(np);
+ if (dev == NULL) {
+ DBG("pmf: not such driver !\n");
+ spin_unlock_irqrestore(&pmf_lock, flags);
+ return;
+ }
+ list_del(&dev->link);
+
+ while(!list_empty(&dev->functions)) {
+ struct pmf_function *func =
+ list_entry(dev->functions.next, typeof(*func), link);
+ list_del(&func->link);
+ __pmf_put_function(func);
+ }
+
+ pmf_put_device(dev);
+ spin_unlock_irqrestore(&pmf_lock, flags);
+}
+EXPORT_SYMBOL_GPL(pmf_unregister_driver);
+
+static struct pmf_function *__pmf_find_function(struct device_node *target,
+ const char *name, u32 flags)
+{
+ struct device_node *actor = of_node_get(target);
+ struct pmf_device *dev;
+ struct pmf_function *func, *result = NULL;
+ char fname[64];
+ const u32 *prop;
+ u32 ph;
+
+ /*
+ * Look for a "platform-*" function reference. If we can't find
+ * one, then we fallback to a direct call attempt
+ */
+ snprintf(fname, 63, "platform-%s", name);
+ prop = of_get_property(target, fname, NULL);
+ if (prop == NULL)
+ goto find_it;
+ ph = *prop;
+ if (ph == 0)
+ goto find_it;
+
+ /*
+ * Ok, now try to find the actor. If we can't find it, we fail,
+ * there is no point in falling back there
+ */
+ of_node_put(actor);
+ actor = of_find_node_by_phandle(ph);
+ if (actor == NULL)
+ return NULL;
+ find_it:
+ dev = pmf_find_device(actor);
+ if (dev == NULL) {
+ result = NULL;
+ goto out;
+ }
+
+ list_for_each_entry(func, &dev->functions, link) {
+ if (name && strcmp(name, func->name))
+ continue;
+ if (func->phandle && target->phandle != func->phandle)
+ continue;
+ if ((func->flags & flags) == 0)
+ continue;
+ result = func;
+ break;
+ }
+ pmf_put_device(dev);
+out:
+ of_node_put(actor);
+ return result;
+}
+
+
+int pmf_register_irq_client(struct device_node *target,
+ const char *name,
+ struct pmf_irq_client *client)
+{
+ struct pmf_function *func;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pmf_lock, flags);
+ func = __pmf_find_function(target, name, PMF_FLAGS_INT_GEN);
+ if (func)
+ func = pmf_get_function(func);
+ spin_unlock_irqrestore(&pmf_lock, flags);
+ if (func == NULL)
+ return -ENODEV;
+
+ /* guard against manipulations of list */
+ mutex_lock(&pmf_irq_mutex);
+ if (list_empty(&func->irq_clients))
+ func->dev->handlers->irq_enable(func);
+
+ /* guard against pmf_do_irq while changing list */
+ spin_lock_irqsave(&pmf_lock, flags);
+ list_add(&client->link, &func->irq_clients);
+ spin_unlock_irqrestore(&pmf_lock, flags);
+
+ client->func = func;
+ mutex_unlock(&pmf_irq_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pmf_register_irq_client);
+
+void pmf_unregister_irq_client(struct pmf_irq_client *client)
+{
+ struct pmf_function *func = client->func;
+ unsigned long flags;
+
+ BUG_ON(func == NULL);
+
+ /* guard against manipulations of list */
+ mutex_lock(&pmf_irq_mutex);
+ client->func = NULL;
+
+ /* guard against pmf_do_irq while changing list */
+ spin_lock_irqsave(&pmf_lock, flags);
+ list_del(&client->link);
+ spin_unlock_irqrestore(&pmf_lock, flags);
+
+ if (list_empty(&func->irq_clients))
+ func->dev->handlers->irq_disable(func);
+ mutex_unlock(&pmf_irq_mutex);
+ pmf_put_function(func);
+}
+EXPORT_SYMBOL_GPL(pmf_unregister_irq_client);
+
+
+void pmf_do_irq(struct pmf_function *func)
+{
+ unsigned long flags;
+ struct pmf_irq_client *client;
+
+ /* For now, using a spinlock over the whole function. Can be made
+ * to drop the lock using 2 lists if necessary
+ */
+ spin_lock_irqsave(&pmf_lock, flags);
+ list_for_each_entry(client, &func->irq_clients, link) {
+ if (!try_module_get(client->owner))
+ continue;
+ client->handler(client->data);
+ module_put(client->owner);
+ }
+ spin_unlock_irqrestore(&pmf_lock, flags);
+}
+EXPORT_SYMBOL_GPL(pmf_do_irq);
+
+
+int pmf_call_one(struct pmf_function *func, struct pmf_args *args)
+{
+ struct pmf_device *dev = func->dev;
+ void *instdata = NULL;
+ int rc = 0;
+
+ DBG(" ** pmf_call_one(%pOF/%s) **\n", dev->node, func->name);
+
+ if (dev->handlers->begin)
+ instdata = dev->handlers->begin(func, args);
+ rc = pmf_parse_one(func, dev->handlers, instdata, args);
+ if (dev->handlers->end)
+ dev->handlers->end(func, instdata);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pmf_call_one);
+
+int pmf_do_functions(struct device_node *np, const char *name,
+ u32 phandle, u32 fflags, struct pmf_args *args)
+{
+ struct pmf_device *dev;
+ struct pmf_function *func, *tmp;
+ unsigned long flags;
+ int rc = -ENODEV;
+
+ spin_lock_irqsave(&pmf_lock, flags);
+
+ dev = pmf_find_device(np);
+ if (dev == NULL) {
+ spin_unlock_irqrestore(&pmf_lock, flags);
+ return -ENODEV;
+ }
+ list_for_each_entry_safe(func, tmp, &dev->functions, link) {
+ if (name && strcmp(name, func->name))
+ continue;
+ if (phandle && func->phandle && phandle != func->phandle)
+ continue;
+ if ((func->flags & fflags) == 0)
+ continue;
+ if (pmf_get_function(func) == NULL)
+ continue;
+ spin_unlock_irqrestore(&pmf_lock, flags);
+ rc = pmf_call_one(func, args);
+ pmf_put_function(func);
+ spin_lock_irqsave(&pmf_lock, flags);
+ }
+ pmf_put_device(dev);
+ spin_unlock_irqrestore(&pmf_lock, flags);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pmf_do_functions);
+
+
+struct pmf_function *pmf_find_function(struct device_node *target,
+ const char *name)
+{
+ struct pmf_function *func;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pmf_lock, flags);
+ func = __pmf_find_function(target, name, PMF_FLAGS_ON_DEMAND);
+ if (func)
+ func = pmf_get_function(func);
+ spin_unlock_irqrestore(&pmf_lock, flags);
+ return func;
+}
+EXPORT_SYMBOL_GPL(pmf_find_function);
+
+int pmf_call_function(struct device_node *target, const char *name,
+ struct pmf_args *args)
+{
+ struct pmf_function *func = pmf_find_function(target, name);
+ int rc;
+
+ if (func == NULL)
+ return -ENODEV;
+
+ rc = pmf_call_one(func, args);
+ pmf_put_function(func);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pmf_call_function);
+
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
new file mode 100644
index 000000000..7135ea1d7
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -0,0 +1,650 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support for the interrupt controllers found on Power Macintosh,
+ * currently Apple's "Grand Central" interrupt controller in all
+ * it's incarnations. OpenPIC support used on newer machines is
+ * in a separate file
+ *
+ * Copyright (C) 1997 Paul Mackerras (paulus@samba.org)
+ * Copyright (C) 2005 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ * IBM, Corp.
+ */
+
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/syscore_ops.h>
+#include <linux/adb.h>
+#include <linux/minmax.h>
+#include <linux/pmu.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/pci-bridge.h>
+#include <asm/time.h>
+#include <asm/pmac_feature.h>
+#include <asm/mpic.h>
+#include <asm/xmon.h>
+
+#include "pmac.h"
+
+#ifdef CONFIG_PPC32
+struct pmac_irq_hw {
+ unsigned int event;
+ unsigned int enable;
+ unsigned int ack;
+ unsigned int level;
+};
+
+/* Workaround flags for 32bit powermac machines */
+unsigned int of_irq_workarounds;
+struct device_node *of_irq_dflt_pic;
+
+/* Default addresses */
+static volatile struct pmac_irq_hw __iomem *pmac_irq_hw[4];
+
+static int max_irqs;
+static int max_real_irqs;
+
+static DEFINE_RAW_SPINLOCK(pmac_pic_lock);
+
+/* The max irq number this driver deals with is 128; see max_irqs */
+static DECLARE_BITMAP(ppc_lost_interrupts, 128);
+static DECLARE_BITMAP(ppc_cached_irq_mask, 128);
+static int pmac_irq_cascade = -1;
+static struct irq_domain *pmac_pic_host;
+
+static void __pmac_retrigger(unsigned int irq_nr)
+{
+ if (irq_nr >= max_real_irqs && pmac_irq_cascade > 0) {
+ __set_bit(irq_nr, ppc_lost_interrupts);
+ irq_nr = pmac_irq_cascade;
+ mb();
+ }
+ if (!__test_and_set_bit(irq_nr, ppc_lost_interrupts)) {
+ atomic_inc(&ppc_n_lost_interrupts);
+ set_dec(1);
+ }
+}
+
+static void pmac_mask_and_ack_irq(struct irq_data *d)
+{
+ unsigned int src = irqd_to_hwirq(d);
+ unsigned long bit = 1UL << (src & 0x1f);
+ int i = src >> 5;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+ __clear_bit(src, ppc_cached_irq_mask);
+ if (__test_and_clear_bit(src, ppc_lost_interrupts))
+ atomic_dec(&ppc_n_lost_interrupts);
+ out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
+ out_le32(&pmac_irq_hw[i]->ack, bit);
+ do {
+ /* make sure ack gets to controller before we enable
+ interrupts */
+ mb();
+ } while((in_le32(&pmac_irq_hw[i]->enable) & bit)
+ != (ppc_cached_irq_mask[i] & bit));
+ raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void pmac_ack_irq(struct irq_data *d)
+{
+ unsigned int src = irqd_to_hwirq(d);
+ unsigned long bit = 1UL << (src & 0x1f);
+ int i = src >> 5;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+ if (__test_and_clear_bit(src, ppc_lost_interrupts))
+ atomic_dec(&ppc_n_lost_interrupts);
+ out_le32(&pmac_irq_hw[i]->ack, bit);
+ (void)in_le32(&pmac_irq_hw[i]->ack);
+ raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void __pmac_set_irq_mask(unsigned int irq_nr, int nokicklost)
+{
+ unsigned long bit = 1UL << (irq_nr & 0x1f);
+ int i = irq_nr >> 5;
+
+ if ((unsigned)irq_nr >= max_irqs)
+ return;
+
+ /* enable unmasked interrupts */
+ out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]);
+
+ do {
+ /* make sure mask gets to controller before we
+ return to user */
+ mb();
+ } while((in_le32(&pmac_irq_hw[i]->enable) & bit)
+ != (ppc_cached_irq_mask[i] & bit));
+
+ /*
+ * Unfortunately, setting the bit in the enable register
+ * when the device interrupt is already on *doesn't* set
+ * the bit in the flag register or request another interrupt.
+ */
+ if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level))
+ __pmac_retrigger(irq_nr);
+}
+
+/* When an irq gets requested for the first client, if it's an
+ * edge interrupt, we clear any previous one on the controller
+ */
+static unsigned int pmac_startup_irq(struct irq_data *d)
+{
+ unsigned long flags;
+ unsigned int src = irqd_to_hwirq(d);
+ unsigned long bit = 1UL << (src & 0x1f);
+ int i = src >> 5;
+
+ raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+ if (!irqd_is_level_type(d))
+ out_le32(&pmac_irq_hw[i]->ack, bit);
+ __set_bit(src, ppc_cached_irq_mask);
+ __pmac_set_irq_mask(src, 0);
+ raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+
+ return 0;
+}
+
+static void pmac_mask_irq(struct irq_data *d)
+{
+ unsigned long flags;
+ unsigned int src = irqd_to_hwirq(d);
+
+ raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+ __clear_bit(src, ppc_cached_irq_mask);
+ __pmac_set_irq_mask(src, 1);
+ raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static void pmac_unmask_irq(struct irq_data *d)
+{
+ unsigned long flags;
+ unsigned int src = irqd_to_hwirq(d);
+
+ raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+ __set_bit(src, ppc_cached_irq_mask);
+ __pmac_set_irq_mask(src, 0);
+ raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+}
+
+static int pmac_retrigger(struct irq_data *d)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+ __pmac_retrigger(irqd_to_hwirq(d));
+ raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+ return 1;
+}
+
+static struct irq_chip pmac_pic = {
+ .name = "PMAC-PIC",
+ .irq_startup = pmac_startup_irq,
+ .irq_mask = pmac_mask_irq,
+ .irq_ack = pmac_ack_irq,
+ .irq_mask_ack = pmac_mask_and_ack_irq,
+ .irq_unmask = pmac_unmask_irq,
+ .irq_retrigger = pmac_retrigger,
+};
+
+static irqreturn_t gatwick_action(int cpl, void *dev_id)
+{
+ unsigned long flags;
+ int irq, bits;
+ int rc = IRQ_NONE;
+
+ raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+ for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) {
+ int i = irq >> 5;
+ bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
+ bits |= in_le32(&pmac_irq_hw[i]->level);
+ bits &= ppc_cached_irq_mask[i];
+ if (bits == 0)
+ continue;
+ irq += __ilog2(bits);
+ raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+ generic_handle_irq(irq);
+ raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+ rc = IRQ_HANDLED;
+ }
+ raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+ return rc;
+}
+
+static unsigned int pmac_pic_get_irq(void)
+{
+ int irq;
+ unsigned long bits = 0;
+ unsigned long flags;
+
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+ /* IPI's are a hack on the powersurge -- Cort */
+ if (smp_processor_id() != 0) {
+ return psurge_secondary_virq;
+ }
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
+ raw_spin_lock_irqsave(&pmac_pic_lock, flags);
+ for (irq = max_real_irqs; (irq -= 32) >= 0; ) {
+ int i = irq >> 5;
+ bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i];
+ bits |= in_le32(&pmac_irq_hw[i]->level);
+ bits &= ppc_cached_irq_mask[i];
+ if (bits == 0)
+ continue;
+ irq += __ilog2(bits);
+ break;
+ }
+ raw_spin_unlock_irqrestore(&pmac_pic_lock, flags);
+ if (unlikely(irq < 0))
+ return 0;
+ return irq_linear_revmap(pmac_pic_host, irq);
+}
+
+static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node,
+ enum irq_domain_bus_token bus_token)
+{
+ /* We match all, we don't always have a node anyway */
+ return 1;
+}
+
+static int pmac_pic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ if (hw >= max_irqs)
+ return -EINVAL;
+
+ /* Mark level interrupts, set delayed disable for edge ones and set
+ * handlers
+ */
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_chip_and_handler(virq, &pmac_pic, handle_level_irq);
+ return 0;
+}
+
+static const struct irq_domain_ops pmac_pic_host_ops = {
+ .match = pmac_pic_host_match,
+ .map = pmac_pic_host_map,
+ .xlate = irq_domain_xlate_onecell,
+};
+
+static void __init pmac_pic_probe_oldstyle(void)
+{
+ int i;
+ struct device_node *master = NULL;
+ struct device_node *slave = NULL;
+ u8 __iomem *addr;
+ struct resource r;
+
+ /* Set our get_irq function */
+ ppc_md.get_irq = pmac_pic_get_irq;
+
+ /*
+ * Find the interrupt controller type & node
+ */
+
+ if ((master = of_find_node_by_name(NULL, "gc")) != NULL) {
+ max_irqs = max_real_irqs = 32;
+ } else if ((master = of_find_node_by_name(NULL, "ohare")) != NULL) {
+ max_irqs = max_real_irqs = 32;
+ /* We might have a second cascaded ohare */
+ slave = of_find_node_by_name(NULL, "pci106b,7");
+ if (slave)
+ max_irqs = 64;
+ } else if ((master = of_find_node_by_name(NULL, "mac-io")) != NULL) {
+ max_irqs = max_real_irqs = 64;
+
+ /* We might have a second cascaded heathrow */
+
+ /* Compensate for of_node_put() in of_find_node_by_name() */
+ of_node_get(master);
+ slave = of_find_node_by_name(master, "mac-io");
+
+ /* Check ordering of master & slave */
+ if (of_device_is_compatible(master, "gatwick")) {
+ BUG_ON(slave == NULL);
+ swap(master, slave);
+ }
+
+ /* We found a slave */
+ if (slave)
+ max_irqs = 128;
+ }
+ BUG_ON(master == NULL);
+
+ /*
+ * Allocate an irq host
+ */
+ pmac_pic_host = irq_domain_add_linear(master, max_irqs,
+ &pmac_pic_host_ops, NULL);
+ BUG_ON(pmac_pic_host == NULL);
+ irq_set_default_host(pmac_pic_host);
+
+ /* Get addresses of first controller if we have a node for it */
+ BUG_ON(of_address_to_resource(master, 0, &r));
+
+ /* Map interrupts of primary controller */
+ addr = (u8 __iomem *) ioremap(r.start, 0x40);
+ i = 0;
+ pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *)
+ (addr + 0x20);
+ if (max_real_irqs > 32)
+ pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *)
+ (addr + 0x10);
+ of_node_put(master);
+
+ printk(KERN_INFO "irq: Found primary Apple PIC %pOF for %d irqs\n",
+ master, max_real_irqs);
+
+ /* Map interrupts of cascaded controller */
+ if (slave && !of_address_to_resource(slave, 0, &r)) {
+ addr = (u8 __iomem *)ioremap(r.start, 0x40);
+ pmac_irq_hw[i++] = (volatile struct pmac_irq_hw __iomem *)
+ (addr + 0x20);
+ if (max_irqs > 64)
+ pmac_irq_hw[i++] =
+ (volatile struct pmac_irq_hw __iomem *)
+ (addr + 0x10);
+ pmac_irq_cascade = irq_of_parse_and_map(slave, 0);
+
+ printk(KERN_INFO "irq: Found slave Apple PIC %pOF for %d irqs"
+ " cascade: %d\n", slave,
+ max_irqs - max_real_irqs, pmac_irq_cascade);
+ }
+ of_node_put(slave);
+
+ /* Disable all interrupts in all controllers */
+ for (i = 0; i * 32 < max_irqs; ++i)
+ out_le32(&pmac_irq_hw[i]->enable, 0);
+
+ /* Hookup cascade irq */
+ if (slave && pmac_irq_cascade) {
+ if (request_irq(pmac_irq_cascade, gatwick_action,
+ IRQF_NO_THREAD, "cascade", NULL))
+ pr_err("Failed to register cascade interrupt\n");
+ }
+
+ printk(KERN_INFO "irq: System has %d possible interrupts\n", max_irqs);
+#ifdef CONFIG_XMON
+ i = irq_create_mapping(NULL, 20);
+ if (request_irq(i, xmon_irq, IRQF_NO_THREAD, "NMI - XMON", NULL))
+ pr_err("Failed to register NMI-XMON interrupt\n");
+#endif
+}
+
+int of_irq_parse_oldworld(const struct device_node *device, int index,
+ struct of_phandle_args *out_irq)
+{
+ const u32 *ints = NULL;
+ int intlen;
+
+ /*
+ * Old machines just have a list of interrupt numbers
+ * and no interrupt-controller nodes. We also have dodgy
+ * cases where the APPL,interrupts property is completely
+ * missing behind pci-pci bridges and we have to get it
+ * from the parent (the bridge itself, as apple just wired
+ * everything together on these)
+ */
+ while (device) {
+ ints = of_get_property(device, "AAPL,interrupts", &intlen);
+ if (ints != NULL)
+ break;
+ device = device->parent;
+ if (!of_node_is_type(device, "pci"))
+ break;
+ }
+ if (ints == NULL)
+ return -EINVAL;
+ intlen /= sizeof(u32);
+
+ if (index >= intlen)
+ return -EINVAL;
+
+ out_irq->np = NULL;
+ out_irq->args[0] = ints[index];
+ out_irq->args_count = 1;
+
+ return 0;
+}
+#endif /* CONFIG_PPC32 */
+
+static void __init pmac_pic_setup_mpic_nmi(struct mpic *mpic)
+{
+#if defined(CONFIG_XMON) && defined(CONFIG_PPC32)
+ struct device_node* pswitch;
+ int nmi_irq;
+
+ pswitch = of_find_node_by_name(NULL, "programmer-switch");
+ if (pswitch) {
+ nmi_irq = irq_of_parse_and_map(pswitch, 0);
+ if (nmi_irq) {
+ mpic_irq_set_priority(nmi_irq, 9);
+ if (request_irq(nmi_irq, xmon_irq, IRQF_NO_THREAD,
+ "NMI - XMON", NULL))
+ pr_err("Failed to register NMI-XMON interrupt\n");
+ }
+ of_node_put(pswitch);
+ }
+#endif /* defined(CONFIG_XMON) && defined(CONFIG_PPC32) */
+}
+
+static struct mpic * __init pmac_setup_one_mpic(struct device_node *np,
+ int master)
+{
+ const char *name = master ? " MPIC 1 " : " MPIC 2 ";
+ struct mpic *mpic;
+ unsigned int flags = master ? 0 : MPIC_SECONDARY;
+
+ pmac_call_feature(PMAC_FTR_ENABLE_MPIC, np, 0, 0);
+
+ if (of_property_read_bool(np, "big-endian"))
+ flags |= MPIC_BIG_ENDIAN;
+
+ /* Primary Big Endian means HT interrupts. This is quite dodgy
+ * but works until I find a better way
+ */
+ if (master && (flags & MPIC_BIG_ENDIAN))
+ flags |= MPIC_U3_HT_IRQS;
+
+ mpic = mpic_alloc(np, 0, flags, 0, 0, name);
+ if (mpic == NULL)
+ return NULL;
+
+ mpic_init(mpic);
+
+ return mpic;
+ }
+
+static int __init pmac_pic_probe_mpic(void)
+{
+ struct mpic *mpic1, *mpic2;
+ struct device_node *np, *master = NULL, *slave = NULL;
+
+ /* We can have up to 2 MPICs cascaded */
+ for_each_node_by_type(np, "open-pic") {
+ if (master == NULL && !of_property_present(np, "interrupts"))
+ master = of_node_get(np);
+ else if (slave == NULL)
+ slave = of_node_get(np);
+ if (master && slave) {
+ of_node_put(np);
+ break;
+ }
+ }
+
+ /* Check for bogus setups */
+ if (master == NULL && slave != NULL) {
+ master = slave;
+ slave = NULL;
+ }
+
+ /* Not found, default to good old pmac pic */
+ if (master == NULL)
+ return -ENODEV;
+
+ /* Set master handler */
+ ppc_md.get_irq = mpic_get_irq;
+
+ /* Setup master */
+ mpic1 = pmac_setup_one_mpic(master, 1);
+ BUG_ON(mpic1 == NULL);
+
+ /* Install NMI if any */
+ pmac_pic_setup_mpic_nmi(mpic1);
+
+ of_node_put(master);
+
+ /* Set up a cascaded controller, if present */
+ if (slave) {
+ mpic2 = pmac_setup_one_mpic(slave, 0);
+ if (mpic2 == NULL)
+ printk(KERN_ERR "Failed to setup slave MPIC\n");
+ of_node_put(slave);
+ }
+
+ return 0;
+}
+
+
+void __init pmac_pic_init(void)
+{
+ /* We configure the OF parsing based on our oldworld vs. newworld
+ * platform type and whether we were booted by BootX.
+ */
+#ifdef CONFIG_PPC32
+ if (!pmac_newworld)
+ of_irq_workarounds |= OF_IMAP_OLDWORLD_MAC;
+ if (of_property_read_bool(of_chosen, "linux,bootx"))
+ of_irq_workarounds |= OF_IMAP_NO_PHANDLE;
+
+ /* If we don't have phandles on a newworld, then try to locate a
+ * default interrupt controller (happens when booting with BootX).
+ * We do a first match here, hopefully, that only ever happens on
+ * machines with one controller.
+ */
+ if (pmac_newworld && (of_irq_workarounds & OF_IMAP_NO_PHANDLE)) {
+ struct device_node *np;
+
+ for_each_node_with_property(np, "interrupt-controller") {
+ /* Skip /chosen/interrupt-controller */
+ if (of_node_name_eq(np, "chosen"))
+ continue;
+ /* It seems like at least one person wants
+ * to use BootX on a machine with an AppleKiwi
+ * controller which happens to pretend to be an
+ * interrupt controller too. */
+ if (of_node_name_eq(np, "AppleKiwi"))
+ continue;
+ /* I think we found one ! */
+ of_irq_dflt_pic = np;
+ break;
+ }
+ }
+#endif /* CONFIG_PPC32 */
+
+ /* We first try to detect Apple's new Core99 chipset, since mac-io
+ * is quite different on those machines and contains an IBM MPIC2.
+ */
+ if (pmac_pic_probe_mpic() == 0)
+ return;
+
+#ifdef CONFIG_PPC32
+ pmac_pic_probe_oldstyle();
+#endif
+}
+
+#if defined(CONFIG_PM) && defined(CONFIG_PPC32)
+/*
+ * These procedures are used in implementing sleep on the powerbooks.
+ * sleep_save_intrs() saves the states of all interrupt enables
+ * and disables all interrupts except for the nominated one.
+ * sleep_restore_intrs() restores the states of all interrupt enables.
+ */
+unsigned long sleep_save_mask[2];
+
+/* This used to be passed by the PMU driver but that link got
+ * broken with the new driver model. We use this tweak for now...
+ * We really want to do things differently though...
+ */
+static int pmacpic_find_viaint(void)
+{
+ int viaint = -1;
+
+#ifdef CONFIG_ADB_PMU
+ struct device_node *np;
+
+ if (pmu_get_model() != PMU_OHARE_BASED)
+ goto not_found;
+ np = of_find_node_by_name(NULL, "via-pmu");
+ if (np == NULL)
+ goto not_found;
+ viaint = irq_of_parse_and_map(np, 0);
+ of_node_put(np);
+
+not_found:
+#endif /* CONFIG_ADB_PMU */
+ return viaint;
+}
+
+static int pmacpic_suspend(void)
+{
+ int viaint = pmacpic_find_viaint();
+
+ sleep_save_mask[0] = ppc_cached_irq_mask[0];
+ sleep_save_mask[1] = ppc_cached_irq_mask[1];
+ ppc_cached_irq_mask[0] = 0;
+ ppc_cached_irq_mask[1] = 0;
+ if (viaint > 0)
+ set_bit(viaint, ppc_cached_irq_mask);
+ out_le32(&pmac_irq_hw[0]->enable, ppc_cached_irq_mask[0]);
+ if (max_real_irqs > 32)
+ out_le32(&pmac_irq_hw[1]->enable, ppc_cached_irq_mask[1]);
+ (void)in_le32(&pmac_irq_hw[0]->event);
+ /* make sure mask gets to controller before we return to caller */
+ mb();
+ (void)in_le32(&pmac_irq_hw[0]->enable);
+
+ return 0;
+}
+
+static void pmacpic_resume(void)
+{
+ int i;
+
+ out_le32(&pmac_irq_hw[0]->enable, 0);
+ if (max_real_irqs > 32)
+ out_le32(&pmac_irq_hw[1]->enable, 0);
+ mb();
+ for (i = 0; i < max_real_irqs; ++i)
+ if (test_bit(i, sleep_save_mask))
+ pmac_unmask_irq(irq_get_irq_data(i));
+}
+
+static struct syscore_ops pmacpic_syscore_ops = {
+ .suspend = pmacpic_suspend,
+ .resume = pmacpic_resume,
+};
+
+static int __init init_pmacpic_syscore(void)
+{
+ if (pmac_irq_hw[0])
+ register_syscore_ops(&pmacpic_syscore_ops);
+ return 0;
+}
+
+machine_subsys_initcall(powermac, init_pmacpic_syscore);
+
+#endif /* CONFIG_PM && CONFIG_PPC32 */
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
new file mode 100644
index 000000000..1b696f352
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PMAC_H__
+#define __PMAC_H__
+
+#include <linux/pci.h>
+#include <linux/irq.h>
+
+#include <asm/pmac_feature.h>
+
+/*
+ * Declaration for the various functions exported by the
+ * pmac_* files. Mostly for use by pmac_setup
+ */
+
+struct rtc_time;
+
+extern int pmac_newworld;
+
+void g5_phy_disable_cpu1(void);
+
+extern long pmac_time_init(void);
+extern time64_t pmac_get_boot_time(void);
+extern void pmac_get_rtc_time(struct rtc_time *);
+extern int pmac_set_rtc_time(struct rtc_time *);
+extern void pmac_read_rtc_time(void);
+extern void pmac_calibrate_decr(void);
+extern void pmac_pci_irq_fixup(struct pci_dev *);
+extern void pmac_pci_init(void);
+
+extern void pmac_nvram_update(void);
+extern unsigned char pmac_nvram_read_byte(int addr);
+extern void pmac_nvram_write_byte(int addr, unsigned char val);
+extern void pmac_pcibios_after_init(void);
+
+extern void pmac_setup_pci_dma(void);
+extern void pmac_check_ht_link(void);
+
+extern void pmac_setup_smp(void);
+extern int psurge_secondary_virq;
+extern void low_cpu_offline_self(void) __attribute__((noreturn));
+
+extern int pmac_nvram_init(void);
+extern void pmac_pic_init(void);
+
+extern struct pci_controller_ops pmac_pci_controller_ops;
+
+#endif /* __PMAC_H__ */
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
new file mode 100644
index 000000000..6de1cd5d8
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Powermac setup and early boot code plus other random bits.
+ *
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Adapted for Power Macintosh by Paul Mackerras
+ * Copyright (C) 1996 Paul Mackerras (paulus@samba.org)
+ *
+ * Derived from "arch/alpha/kernel/setup.c"
+ * Copyright (C) 1995 Linus Torvalds
+ *
+ * Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/export.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/major.h>
+#include <linux/initrd.h>
+#include <linux/vt_kern.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/bitops.h>
+#include <linux/suspend.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include <asm/reg.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/ohare.h>
+#include <asm/mediabay.h>
+#include <asm/machdep.h>
+#include <asm/dma.h>
+#include <asm/cputable.h>
+#include <asm/btext.h>
+#include <asm/pmac_feature.h>
+#include <asm/time.h>
+#include <asm/mmu_context.h>
+#include <asm/iommu.h>
+#include <asm/smu.h>
+#include <asm/pmc.h>
+#include <asm/udbg.h>
+
+#include "pmac.h"
+
+#undef SHOW_GATWICK_IRQS
+
+static int has_l2cache;
+
+int pmac_newworld;
+
+static int current_root_goodness = -1;
+
+/* sda1 - slightly silly choice */
+#define DEFAULT_ROOT_DEVICE MKDEV(SCSI_DISK0_MAJOR, 1)
+
+sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN;
+EXPORT_SYMBOL(sys_ctrler);
+
+static void pmac_show_cpuinfo(struct seq_file *m)
+{
+ struct device_node *np;
+ const char *pp;
+ int plen;
+ int mbmodel;
+ unsigned int mbflags;
+ char* mbname;
+
+ mbmodel = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL,
+ PMAC_MB_INFO_MODEL, 0);
+ mbflags = pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL,
+ PMAC_MB_INFO_FLAGS, 0);
+ if (pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, PMAC_MB_INFO_NAME,
+ (long) &mbname) != 0)
+ mbname = "Unknown";
+
+ /* find motherboard type */
+ seq_printf(m, "machine\t\t: ");
+ np = of_find_node_by_path("/");
+ if (np != NULL) {
+ pp = of_get_property(np, "model", NULL);
+ if (pp != NULL)
+ seq_printf(m, "%s\n", pp);
+ else
+ seq_printf(m, "PowerMac\n");
+ pp = of_get_property(np, "compatible", &plen);
+ if (pp != NULL) {
+ seq_printf(m, "motherboard\t:");
+ while (plen > 0) {
+ int l = strlen(pp) + 1;
+ seq_printf(m, " %s", pp);
+ plen -= l;
+ pp += l;
+ }
+ seq_printf(m, "\n");
+ }
+ of_node_put(np);
+ } else
+ seq_printf(m, "PowerMac\n");
+
+ /* print parsed model */
+ seq_printf(m, "detected as\t: %d (%s)\n", mbmodel, mbname);
+ seq_printf(m, "pmac flags\t: %08x\n", mbflags);
+
+ /* find l2 cache info */
+ np = of_find_node_by_name(NULL, "l2-cache");
+ if (np == NULL)
+ np = of_find_node_by_type(NULL, "cache");
+ if (np != NULL) {
+ const unsigned int *ic =
+ of_get_property(np, "i-cache-size", NULL);
+ const unsigned int *dc =
+ of_get_property(np, "d-cache-size", NULL);
+ seq_printf(m, "L2 cache\t:");
+ has_l2cache = 1;
+ if (of_property_read_bool(np, "cache-unified") && dc) {
+ seq_printf(m, " %dK unified", *dc / 1024);
+ } else {
+ if (ic)
+ seq_printf(m, " %dK instruction", *ic / 1024);
+ if (dc)
+ seq_printf(m, "%s %dK data",
+ (ic? " +": ""), *dc / 1024);
+ }
+ pp = of_get_property(np, "ram-type", NULL);
+ if (pp)
+ seq_printf(m, " %s", pp);
+ seq_printf(m, "\n");
+ of_node_put(np);
+ }
+
+ /* Indicate newworld/oldworld */
+ seq_printf(m, "pmac-generation\t: %s\n",
+ pmac_newworld ? "NewWorld" : "OldWorld");
+}
+
+#ifndef CONFIG_ADB_CUDA
+int __init find_via_cuda(void)
+{
+ struct device_node *dn = of_find_node_by_name(NULL, "via-cuda");
+
+ if (!dn)
+ return 0;
+ of_node_put(dn);
+ printk("WARNING ! Your machine is CUDA-based but your kernel\n");
+ printk(" wasn't compiled with CONFIG_ADB_CUDA option !\n");
+ return 0;
+}
+#endif
+
+#ifndef CONFIG_ADB_PMU
+int __init find_via_pmu(void)
+{
+ struct device_node *dn = of_find_node_by_name(NULL, "via-pmu");
+
+ if (!dn)
+ return 0;
+ of_node_put(dn);
+ printk("WARNING ! Your machine is PMU-based but your kernel\n");
+ printk(" wasn't compiled with CONFIG_ADB_PMU option !\n");
+ return 0;
+}
+#endif
+
+#ifndef CONFIG_PMAC_SMU
+int __init smu_init(void)
+{
+ /* should check and warn if SMU is present */
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_PPC32
+static volatile u32 *sysctrl_regs;
+
+static void __init ohare_init(void)
+{
+ struct device_node *dn;
+
+ /* this area has the CPU identification register
+ and some registers used by smp boards */
+ sysctrl_regs = (volatile u32 *) ioremap(0xf8000000, 0x1000);
+
+ /*
+ * Turn on the L2 cache.
+ * We assume that we have a PSX memory controller iff
+ * we have an ohare I/O controller.
+ */
+ dn = of_find_node_by_name(NULL, "ohare");
+ if (dn) {
+ of_node_put(dn);
+ if (((sysctrl_regs[2] >> 24) & 0xf) >= 3) {
+ if (sysctrl_regs[4] & 0x10)
+ sysctrl_regs[4] |= 0x04000020;
+ else
+ sysctrl_regs[4] |= 0x04000000;
+ if(has_l2cache)
+ printk(KERN_INFO "Level 2 cache enabled\n");
+ }
+ }
+}
+
+static void __init l2cr_init(void)
+{
+ /* Checks "l2cr-value" property in the registry */
+ if (cpu_has_feature(CPU_FTR_L2CR)) {
+ struct device_node *np;
+
+ for_each_of_cpu_node(np) {
+ const unsigned int *l2cr =
+ of_get_property(np, "l2cr-value", NULL);
+ if (l2cr) {
+ _set_L2CR(0);
+ _set_L2CR(*l2cr);
+ pr_info("L2CR overridden (0x%x), backside cache is %s\n",
+ *l2cr, ((*l2cr) & 0x80000000) ?
+ "enabled" : "disabled");
+ }
+ of_node_put(np);
+ break;
+ }
+ }
+}
+#endif
+
+static void __init pmac_setup_arch(void)
+{
+ struct device_node *cpu, *ic;
+ const int *fp;
+ unsigned long pvr;
+
+ pvr = PVR_VER(mfspr(SPRN_PVR));
+
+ /* Set loops_per_jiffy to a half-way reasonable value,
+ for use until calibrate_delay gets called. */
+ loops_per_jiffy = 50000000 / HZ;
+
+ for_each_of_cpu_node(cpu) {
+ fp = of_get_property(cpu, "clock-frequency", NULL);
+ if (fp != NULL) {
+ if (pvr >= 0x30 && pvr < 0x80)
+ /* PPC970 etc. */
+ loops_per_jiffy = *fp / (3 * HZ);
+ else if (pvr == 4 || pvr >= 8)
+ /* 604, G3, G4 etc. */
+ loops_per_jiffy = *fp / HZ;
+ else
+ /* 603, etc. */
+ loops_per_jiffy = *fp / (2 * HZ);
+ of_node_put(cpu);
+ break;
+ }
+ }
+
+ /* See if newworld or oldworld */
+ ic = of_find_node_with_property(NULL, "interrupt-controller");
+ if (ic) {
+ pmac_newworld = 1;
+ of_node_put(ic);
+ }
+
+#ifdef CONFIG_PPC32
+ ohare_init();
+ l2cr_init();
+#endif /* CONFIG_PPC32 */
+
+ find_via_cuda();
+ find_via_pmu();
+ smu_init();
+
+#if IS_ENABLED(CONFIG_NVRAM)
+ pmac_nvram_init();
+#endif
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_BLK_DEV_INITRD
+ if (initrd_start)
+ ROOT_DEV = Root_RAM0;
+ else
+#endif
+ ROOT_DEV = DEFAULT_ROOT_DEVICE;
+#endif
+
+#ifdef CONFIG_ADB
+ if (strstr(boot_command_line, "adb_sync")) {
+ extern int __adb_probe_sync;
+ __adb_probe_sync = 1;
+ }
+#endif /* CONFIG_ADB */
+}
+
+static int initializing = 1;
+
+static int pmac_late_init(void)
+{
+ initializing = 0;
+ return 0;
+}
+machine_late_initcall(powermac, pmac_late_init);
+
+void note_bootable_part(dev_t dev, int part, int goodness);
+/*
+ * This is __ref because we check for "initializing" before
+ * touching any of the __init sensitive things and "initializing"
+ * will be false after __init time. This can't be __init because it
+ * can be called whenever a disk is first accessed.
+ */
+void __ref note_bootable_part(dev_t dev, int part, int goodness)
+{
+ char *p;
+
+ if (!initializing)
+ return;
+ if ((goodness <= current_root_goodness) &&
+ ROOT_DEV != DEFAULT_ROOT_DEVICE)
+ return;
+ p = strstr(boot_command_line, "root=");
+ if (p != NULL && (p == boot_command_line || p[-1] == ' '))
+ return;
+
+ ROOT_DEV = dev + part;
+ current_root_goodness = goodness;
+}
+
+#ifdef CONFIG_ADB_CUDA
+static void __noreturn cuda_restart(void)
+{
+ struct adb_request req;
+
+ cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_RESET_SYSTEM);
+ for (;;)
+ cuda_poll();
+}
+
+static void __noreturn cuda_shutdown(void)
+{
+ struct adb_request req;
+
+ cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_POWERDOWN);
+ for (;;)
+ cuda_poll();
+}
+
+#else
+#define cuda_restart()
+#define cuda_shutdown()
+#endif
+
+#ifndef CONFIG_ADB_PMU
+#define pmu_restart()
+#define pmu_shutdown()
+#endif
+
+#ifndef CONFIG_PMAC_SMU
+#define smu_restart()
+#define smu_shutdown()
+#endif
+
+static void __noreturn pmac_restart(char *cmd)
+{
+ switch (sys_ctrler) {
+ case SYS_CTRLER_CUDA:
+ cuda_restart();
+ break;
+ case SYS_CTRLER_PMU:
+ pmu_restart();
+ break;
+ case SYS_CTRLER_SMU:
+ smu_restart();
+ break;
+ default: ;
+ }
+ while (1) ;
+}
+
+static void __noreturn pmac_power_off(void)
+{
+ switch (sys_ctrler) {
+ case SYS_CTRLER_CUDA:
+ cuda_shutdown();
+ break;
+ case SYS_CTRLER_PMU:
+ pmu_shutdown();
+ break;
+ case SYS_CTRLER_SMU:
+ smu_shutdown();
+ break;
+ default: ;
+ }
+ while (1) ;
+}
+
+static void __noreturn
+pmac_halt(void)
+{
+ pmac_power_off();
+}
+
+/*
+ * Early initialization.
+ */
+static void __init pmac_init(void)
+{
+ /* Enable early btext debug if requested */
+ if (strstr(boot_command_line, "btextdbg")) {
+ udbg_adb_init_early();
+ register_early_udbg_console();
+ }
+
+ /* Probe motherboard chipset */
+ pmac_feature_init();
+
+ /* Initialize debug stuff */
+ udbg_scc_init(!!strstr(boot_command_line, "sccdbg"));
+ udbg_adb_init(!!strstr(boot_command_line, "btextdbg"));
+
+#ifdef CONFIG_PPC64
+ iommu_init_early_dart(&pmac_pci_controller_ops);
+#endif
+
+ /* SMP Init has to be done early as we need to patch up
+ * cpu_possible_mask before interrupt stacks are allocated
+ * or kaboom...
+ */
+#ifdef CONFIG_SMP
+ pmac_setup_smp();
+#endif
+}
+
+static int __init pmac_declare_of_platform_devices(void)
+{
+ struct device_node *np;
+
+ np = of_find_node_by_name(NULL, "valkyrie");
+ if (np) {
+ of_platform_device_create(np, "valkyrie", NULL);
+ of_node_put(np);
+ }
+ np = of_find_node_by_name(NULL, "platinum");
+ if (np) {
+ of_platform_device_create(np, "platinum", NULL);
+ of_node_put(np);
+ }
+ np = of_find_node_by_type(NULL, "smu");
+ if (np) {
+ of_platform_device_create(np, "smu", NULL);
+ of_node_put(np);
+ }
+ np = of_find_node_by_type(NULL, "fcu");
+ if (np == NULL) {
+ /* Some machines have strangely broken device-tree */
+ np = of_find_node_by_path("/u3@0,f8000000/i2c@f8001000/fan@15e");
+ }
+ if (np) {
+ of_platform_device_create(np, "temperature", NULL);
+ of_node_put(np);
+ }
+
+ return 0;
+}
+machine_device_initcall(powermac, pmac_declare_of_platform_devices);
+
+#ifdef CONFIG_SERIAL_PMACZILOG_CONSOLE
+/*
+ * This is called very early, as part of console_init() (typically just after
+ * time_init()). This function is respondible for trying to find a good
+ * default console on serial ports. It tries to match the open firmware
+ * default output with one of the available serial console drivers.
+ */
+static int __init check_pmac_serial_console(void)
+{
+ struct device_node *prom_stdout = NULL;
+ int offset = 0;
+ const char *name;
+#ifdef CONFIG_SERIAL_PMACZILOG_TTYS
+ char *devname = "ttyS";
+#else
+ char *devname = "ttyPZ";
+#endif
+
+ pr_debug(" -> check_pmac_serial_console()\n");
+
+ /* The user has requested a console so this is already set up. */
+ if (strstr(boot_command_line, "console=")) {
+ pr_debug(" console was specified !\n");
+ return -EBUSY;
+ }
+
+ if (!of_chosen) {
+ pr_debug(" of_chosen is NULL !\n");
+ return -ENODEV;
+ }
+
+ /* We are getting a weird phandle from OF ... */
+ /* ... So use the full path instead */
+ name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (name == NULL) {
+ pr_debug(" no linux,stdout-path !\n");
+ return -ENODEV;
+ }
+ prom_stdout = of_find_node_by_path(name);
+ if (!prom_stdout) {
+ pr_debug(" can't find stdout package %s !\n", name);
+ return -ENODEV;
+ }
+ pr_debug("stdout is %pOF\n", prom_stdout);
+
+ if (of_node_name_eq(prom_stdout, "ch-a"))
+ offset = 0;
+ else if (of_node_name_eq(prom_stdout, "ch-b"))
+ offset = 1;
+ else
+ goto not_found;
+ of_node_put(prom_stdout);
+
+ pr_debug("Found serial console at %s%d\n", devname, offset);
+
+ return add_preferred_console(devname, offset, NULL);
+
+ not_found:
+ pr_debug("No preferred console found !\n");
+ of_node_put(prom_stdout);
+ return -ENODEV;
+}
+console_initcall(check_pmac_serial_console);
+
+#endif /* CONFIG_SERIAL_PMACZILOG_CONSOLE */
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init pmac_probe(void)
+{
+ if (!of_machine_is_compatible("Power Macintosh") &&
+ !of_machine_is_compatible("MacRISC"))
+ return 0;
+
+#ifdef CONFIG_PPC32
+ /* isa_io_base gets set in pmac_pci_init */
+ DMA_MODE_READ = 1;
+ DMA_MODE_WRITE = 2;
+#endif /* CONFIG_PPC32 */
+
+ pm_power_off = pmac_power_off;
+
+ pmac_init();
+
+ return 1;
+}
+
+define_machine(powermac) {
+ .name = "PowerMac",
+ .probe = pmac_probe,
+ .setup_arch = pmac_setup_arch,
+ .discover_phbs = pmac_pci_init,
+ .show_cpuinfo = pmac_show_cpuinfo,
+ .init_IRQ = pmac_pic_init,
+ .get_irq = NULL, /* changed later */
+ .pci_irq_fixup = pmac_pci_irq_fixup,
+ .restart = pmac_restart,
+ .halt = pmac_halt,
+ .time_init = pmac_time_init,
+ .get_boot_time = pmac_get_boot_time,
+ .set_rtc_time = pmac_set_rtc_time,
+ .get_rtc_time = pmac_get_rtc_time,
+ .calibrate_decr = pmac_calibrate_decr,
+ .feature_call = pmac_do_feature_call,
+ .progress = udbg_progress,
+#ifdef CONFIG_PPC64
+ .power_save = power4_idle,
+ .enable_pmcs = power4_enable_pmcs,
+#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC32
+ .pcibios_after_init = pmac_pcibios_after_init,
+ .phys_mem_access_prot = pci_phys_mem_access_prot,
+#endif
+};
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
new file mode 100644
index 000000000..d497a6000
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -0,0 +1,433 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains sleep low-level functions for PowerBook G3.
+ * Copyright (C) 1999 Benjamin Herrenschmidt (benh@kernel.crashing.org)
+ * and Paul Mackerras (paulus@samba.org).
+ */
+
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/cputable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/mmu.h>
+#include <asm/feature-fixups.h>
+
+#define MAGIC 0x4c617273 /* 'Lars' */
+
+/*
+ * Structure for storing CPU registers on the stack.
+ */
+#define SL_SP 0
+#define SL_PC 4
+#define SL_MSR 8
+#define SL_SDR1 0xc
+#define SL_SPRG0 0x10 /* 4 sprg's */
+#define SL_DBAT0 0x20
+#define SL_IBAT0 0x28
+#define SL_DBAT1 0x30
+#define SL_IBAT1 0x38
+#define SL_DBAT2 0x40
+#define SL_IBAT2 0x48
+#define SL_DBAT3 0x50
+#define SL_IBAT3 0x58
+#define SL_DBAT4 0x60
+#define SL_IBAT4 0x68
+#define SL_DBAT5 0x70
+#define SL_IBAT5 0x78
+#define SL_DBAT6 0x80
+#define SL_IBAT6 0x88
+#define SL_DBAT7 0x90
+#define SL_IBAT7 0x98
+#define SL_TB 0xa0
+#define SL_R2 0xa8
+#define SL_CR 0xac
+#define SL_LR 0xb0
+#define SL_R12 0xb4 /* r12 to r31 */
+#define SL_SIZE (SL_R12 + 80)
+
+ .section .text
+ .align 5
+
+#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC) || \
+ (defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PPC32))
+
+/* This gets called by via-pmu.c late during the sleep process.
+ * The PMU was already send the sleep command and will shut us down
+ * soon. We need to save all that is needed and setup the wakeup
+ * vector that will be called by the ROM on wakeup
+ */
+_GLOBAL(low_sleep_handler)
+#ifndef CONFIG_PPC_BOOK3S_32
+ blr
+#else
+ mflr r0
+ lis r11,sleep_storage@ha
+ addi r11,r11,sleep_storage@l
+ stw r0,SL_LR(r11)
+ mfcr r0
+ stw r0,SL_CR(r11)
+ stw r1,SL_SP(r11)
+ stw r2,SL_R2(r11)
+ stmw r12,SL_R12(r11)
+
+ /* Save MSR & SDR1 */
+ mfmsr r4
+ stw r4,SL_MSR(r11)
+ mfsdr1 r4
+ stw r4,SL_SDR1(r11)
+
+ /* Get a stable timebase and save it */
+1: mftbu r4
+ stw r4,SL_TB(r11)
+ mftb r5
+ stw r5,SL_TB+4(r11)
+ mftbu r3
+ cmpw r3,r4
+ bne 1b
+
+ /* Save SPRGs */
+ mfsprg r4,0
+ stw r4,SL_SPRG0(r11)
+ mfsprg r4,1
+ stw r4,SL_SPRG0+4(r11)
+ mfsprg r4,2
+ stw r4,SL_SPRG0+8(r11)
+ mfsprg r4,3
+ stw r4,SL_SPRG0+12(r11)
+
+ /* Save BATs */
+ mfdbatu r4,0
+ stw r4,SL_DBAT0(r11)
+ mfdbatl r4,0
+ stw r4,SL_DBAT0+4(r11)
+ mfdbatu r4,1
+ stw r4,SL_DBAT1(r11)
+ mfdbatl r4,1
+ stw r4,SL_DBAT1+4(r11)
+ mfdbatu r4,2
+ stw r4,SL_DBAT2(r11)
+ mfdbatl r4,2
+ stw r4,SL_DBAT2+4(r11)
+ mfdbatu r4,3
+ stw r4,SL_DBAT3(r11)
+ mfdbatl r4,3
+ stw r4,SL_DBAT3+4(r11)
+ mfibatu r4,0
+ stw r4,SL_IBAT0(r11)
+ mfibatl r4,0
+ stw r4,SL_IBAT0+4(r11)
+ mfibatu r4,1
+ stw r4,SL_IBAT1(r11)
+ mfibatl r4,1
+ stw r4,SL_IBAT1+4(r11)
+ mfibatu r4,2
+ stw r4,SL_IBAT2(r11)
+ mfibatl r4,2
+ stw r4,SL_IBAT2+4(r11)
+ mfibatu r4,3
+ stw r4,SL_IBAT3(r11)
+ mfibatl r4,3
+ stw r4,SL_IBAT3+4(r11)
+
+BEGIN_MMU_FTR_SECTION
+ mfspr r4,SPRN_DBAT4U
+ stw r4,SL_DBAT4(r11)
+ mfspr r4,SPRN_DBAT4L
+ stw r4,SL_DBAT4+4(r11)
+ mfspr r4,SPRN_DBAT5U
+ stw r4,SL_DBAT5(r11)
+ mfspr r4,SPRN_DBAT5L
+ stw r4,SL_DBAT5+4(r11)
+ mfspr r4,SPRN_DBAT6U
+ stw r4,SL_DBAT6(r11)
+ mfspr r4,SPRN_DBAT6L
+ stw r4,SL_DBAT6+4(r11)
+ mfspr r4,SPRN_DBAT7U
+ stw r4,SL_DBAT7(r11)
+ mfspr r4,SPRN_DBAT7L
+ stw r4,SL_DBAT7+4(r11)
+ mfspr r4,SPRN_IBAT4U
+ stw r4,SL_IBAT4(r11)
+ mfspr r4,SPRN_IBAT4L
+ stw r4,SL_IBAT4+4(r11)
+ mfspr r4,SPRN_IBAT5U
+ stw r4,SL_IBAT5(r11)
+ mfspr r4,SPRN_IBAT5L
+ stw r4,SL_IBAT5+4(r11)
+ mfspr r4,SPRN_IBAT6U
+ stw r4,SL_IBAT6(r11)
+ mfspr r4,SPRN_IBAT6L
+ stw r4,SL_IBAT6+4(r11)
+ mfspr r4,SPRN_IBAT7U
+ stw r4,SL_IBAT7(r11)
+ mfspr r4,SPRN_IBAT7L
+ stw r4,SL_IBAT7+4(r11)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
+ /* Backup various CPU config stuffs */
+ bl __save_cpu_setup
+
+ /* The ROM can wake us up via 2 different vectors:
+ * - On wallstreet & lombard, we must write a magic
+ * value 'Lars' at address 4 and a pointer to a
+ * memory location containing the PC to resume from
+ * at address 0.
+ * - On Core99, we must store the wakeup vector at
+ * address 0x80 and eventually it's parameters
+ * at address 0x84. I've have some trouble with those
+ * parameters however and I no longer use them.
+ */
+ lis r5,grackle_wake_up@ha
+ addi r5,r5,grackle_wake_up@l
+ tophys(r5,r5)
+ stw r5,SL_PC(r11)
+ lis r4,KERNELBASE@h
+ tophys(r5,r11)
+ addi r5,r5,SL_PC
+ lis r6,MAGIC@ha
+ addi r6,r6,MAGIC@l
+ stw r5,0(r4)
+ stw r6,4(r4)
+ /* Setup stuffs at 0x80-0x84 for Core99 */
+ lis r3,core99_wake_up@ha
+ addi r3,r3,core99_wake_up@l
+ tophys(r3,r3)
+ stw r3,0x80(r4)
+ stw r5,0x84(r4)
+
+ .globl low_cpu_offline_self
+low_cpu_offline_self:
+ /* Flush & disable all caches */
+ bl flush_disable_caches
+
+ /* Turn off data relocation. */
+ mfmsr r3 /* Save MSR in r7 */
+ rlwinm r3,r3,0,28,26 /* Turn off DR bit */
+ sync
+ mtmsr r3
+ isync
+
+BEGIN_FTR_SECTION
+ /* Flush any pending L2 data prefetches to work around HW bug */
+ sync
+ lis r3,0xfff0
+ lwz r0,0(r3) /* perform cache-inhibited load to ROM */
+ sync /* (caches are disabled at this point) */
+END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
+
+/*
+ * Set the HID0 and MSR for sleep.
+ */
+ mfspr r2,SPRN_HID0
+ rlwinm r2,r2,0,10,7 /* clear doze, nap */
+ oris r2,r2,HID0_SLEEP@h
+ sync
+ isync
+ mtspr SPRN_HID0,r2
+ sync
+
+/* This loop puts us back to sleep in case we have a spurrious
+ * wakeup so that the host bridge properly stays asleep. The
+ * CPU will be turned off, either after a known time (about 1
+ * second) on wallstreet & lombard, or as soon as the CPU enters
+ * SLEEP mode on core99
+ */
+ mfmsr r2
+ oris r2,r2,MSR_POW@h
+1: sync
+ mtmsr r2
+ isync
+ b 1b
+_ASM_NOKPROBE_SYMBOL(low_cpu_offline_self)
+/*
+ * Here is the resume code.
+ */
+
+
+/*
+ * Core99 machines resume here
+ * r4 has the physical address of SL_PC(sp) (unused)
+ */
+_GLOBAL(core99_wake_up)
+ /* Make sure HID0 no longer contains any sleep bit and that data cache
+ * is disabled
+ */
+ mfspr r3,SPRN_HID0
+ rlwinm r3,r3,0,11,7 /* clear SLEEP, NAP, DOZE bits */
+ rlwinm 3,r3,0,18,15 /* clear DCE, ICE */
+ mtspr SPRN_HID0,r3
+ sync
+ isync
+
+ /* sanitize MSR */
+ mfmsr r3
+ ori r3,r3,MSR_EE|MSR_IP
+ xori r3,r3,MSR_EE|MSR_IP
+ sync
+ isync
+ mtmsr r3
+ sync
+ isync
+
+ /* Recover sleep storage */
+ lis r3,sleep_storage@ha
+ addi r3,r3,sleep_storage@l
+ tophys(r3,r3)
+ addi r1,r3,SL_PC
+
+ /* Pass thru to older resume code ... */
+_ASM_NOKPROBE_SYMBOL(core99_wake_up)
+/*
+ * Here is the resume code for older machines.
+ * r1 has the physical address of SL_PC(sp).
+ */
+
+grackle_wake_up:
+
+ /* Restore the kernel's segment registers before
+ * we do any r1 memory access as we are not sure they
+ * are in a sane state above the first 256Mb region
+ */
+ bl load_segment_registers
+ sync
+ isync
+
+ subi r1,r1,SL_PC
+
+ /* Restore various CPU config stuffs */
+ bl __restore_cpu_setup
+
+ /* Make sure all FPRs have been initialized */
+ bl reloc_offset
+ bl __init_fpu_registers
+
+ /* Invalidate & enable L1 cache, we don't care about
+ * whatever the ROM may have tried to write to memory
+ */
+ bl __inval_enable_L1
+
+ /* Restore the BATs, and SDR1. Then we can turn on the MMU. */
+ lwz r4,SL_SDR1(r1)
+ mtsdr1 r4
+ lwz r4,SL_SPRG0(r1)
+ mtsprg 0,r4
+ lwz r4,SL_SPRG0+4(r1)
+ mtsprg 1,r4
+ lwz r4,SL_SPRG0+8(r1)
+ mtsprg 2,r4
+ lwz r4,SL_SPRG0+12(r1)
+ mtsprg 3,r4
+
+ lwz r4,SL_DBAT0(r1)
+ mtdbatu 0,r4
+ lwz r4,SL_DBAT0+4(r1)
+ mtdbatl 0,r4
+ lwz r4,SL_DBAT1(r1)
+ mtdbatu 1,r4
+ lwz r4,SL_DBAT1+4(r1)
+ mtdbatl 1,r4
+ lwz r4,SL_DBAT2(r1)
+ mtdbatu 2,r4
+ lwz r4,SL_DBAT2+4(r1)
+ mtdbatl 2,r4
+ lwz r4,SL_DBAT3(r1)
+ mtdbatu 3,r4
+ lwz r4,SL_DBAT3+4(r1)
+ mtdbatl 3,r4
+ lwz r4,SL_IBAT0(r1)
+ mtibatu 0,r4
+ lwz r4,SL_IBAT0+4(r1)
+ mtibatl 0,r4
+ lwz r4,SL_IBAT1(r1)
+ mtibatu 1,r4
+ lwz r4,SL_IBAT1+4(r1)
+ mtibatl 1,r4
+ lwz r4,SL_IBAT2(r1)
+ mtibatu 2,r4
+ lwz r4,SL_IBAT2+4(r1)
+ mtibatl 2,r4
+ lwz r4,SL_IBAT3(r1)
+ mtibatu 3,r4
+ lwz r4,SL_IBAT3+4(r1)
+ mtibatl 3,r4
+
+BEGIN_MMU_FTR_SECTION
+ lwz r4,SL_DBAT4(r1)
+ mtspr SPRN_DBAT4U,r4
+ lwz r4,SL_DBAT4+4(r1)
+ mtspr SPRN_DBAT4L,r4
+ lwz r4,SL_DBAT5(r1)
+ mtspr SPRN_DBAT5U,r4
+ lwz r4,SL_DBAT5+4(r1)
+ mtspr SPRN_DBAT5L,r4
+ lwz r4,SL_DBAT6(r1)
+ mtspr SPRN_DBAT6U,r4
+ lwz r4,SL_DBAT6+4(r1)
+ mtspr SPRN_DBAT6L,r4
+ lwz r4,SL_DBAT7(r1)
+ mtspr SPRN_DBAT7U,r4
+ lwz r4,SL_DBAT7+4(r1)
+ mtspr SPRN_DBAT7L,r4
+ lwz r4,SL_IBAT4(r1)
+ mtspr SPRN_IBAT4U,r4
+ lwz r4,SL_IBAT4+4(r1)
+ mtspr SPRN_IBAT4L,r4
+ lwz r4,SL_IBAT5(r1)
+ mtspr SPRN_IBAT5U,r4
+ lwz r4,SL_IBAT5+4(r1)
+ mtspr SPRN_IBAT5L,r4
+ lwz r4,SL_IBAT6(r1)
+ mtspr SPRN_IBAT6U,r4
+ lwz r4,SL_IBAT6+4(r1)
+ mtspr SPRN_IBAT6L,r4
+ lwz r4,SL_IBAT7(r1)
+ mtspr SPRN_IBAT7U,r4
+ lwz r4,SL_IBAT7+4(r1)
+ mtspr SPRN_IBAT7L,r4
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
+ /* Flush all TLBs */
+ lis r4,0x1000
+1: addic. r4,r4,-0x1000
+ tlbie r4
+ blt 1b
+ sync
+
+ /* Restore TB */
+ li r3,0
+ mttbl r3
+ lwz r3,SL_TB(r1)
+ lwz r4,SL_TB+4(r1)
+ mttbu r3
+ mttbl r4
+
+ /* Restore the callee-saved registers and return */
+ lwz r0,SL_CR(r1)
+ mtcr r0
+ lwz r2,SL_R2(r1)
+ lmw r12,SL_R12(r1)
+
+ /* restore the MSR and SP and turn on the MMU and return */
+ lwz r3,SL_MSR(r1)
+ lwz r4,SL_LR(r1)
+ lwz r1,SL_SP(r1)
+ mtsrr0 r4
+ mtsrr1 r3
+ sync
+ isync
+ rfi
+_ASM_NOKPROBE_SYMBOL(grackle_wake_up)
+
+#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
+
+ .section .bss
+ .balign L1_CACHE_BYTES
+sleep_storage:
+ .space SL_SIZE
+ .balign L1_CACHE_BYTES, 0
+
+#endif /* CONFIG_PPC_BOOK3S_32 */
+ .section .text
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
new file mode 100644
index 000000000..8be71920e
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -0,0 +1,1025 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SMP support for power macintosh.
+ *
+ * We support both the old "powersurge" SMP architecture
+ * and the current Core99 (G4 PowerMac) machines.
+ *
+ * Note that we don't support the very first rev. of
+ * Apple/DayStar 2 CPUs board, the one with the funky
+ * watchdog. Hopefully, none of these should be there except
+ * maybe internally to Apple. I should probably still add some
+ * code to detect this card though and disable SMP. --BenH.
+ *
+ * Support Macintosh G4 SMP by Troy Benjegerdes (hozer@drgw.net)
+ * and Ben Herrenschmidt <benh@kernel.crashing.org>.
+ *
+ * Support for DayStar quad CPU cards
+ * Copyright (C) XLR8, Inc. 1994-2000
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/hotplug.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/cpu.h>
+#include <linux/compiler.h>
+#include <linux/pgtable.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/code-patching.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/machdep.h>
+#include <asm/pmac_feature.h>
+#include <asm/time.h>
+#include <asm/mpic.h>
+#include <asm/cacheflush.h>
+#include <asm/keylargo.h>
+#include <asm/pmac_low_i2c.h>
+#include <asm/pmac_pfunc.h>
+#include <asm/inst.h>
+
+#include "pmac.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+extern void __secondary_start_pmac_0(void);
+
+static void (*pmac_tb_freeze)(int freeze);
+static u64 timebase;
+static int tb_req;
+
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+
+/*
+ * Powersurge (old powermac SMP) support.
+ */
+
+/* Addresses for powersurge registers */
+#define HAMMERHEAD_BASE 0xf8000000
+#define HHEAD_CONFIG 0x90
+#define HHEAD_SEC_INTR 0xc0
+
+/* register for interrupting the primary processor on the powersurge */
+/* N.B. this is actually the ethernet ROM! */
+#define PSURGE_PRI_INTR 0xf3019000
+
+/* register for storing the start address for the secondary processor */
+/* N.B. this is the PCI config space address register for the 1st bridge */
+#define PSURGE_START 0xf2800000
+
+/* Daystar/XLR8 4-CPU card */
+#define PSURGE_QUAD_REG_ADDR 0xf8800000
+
+#define PSURGE_QUAD_IRQ_SET 0
+#define PSURGE_QUAD_IRQ_CLR 1
+#define PSURGE_QUAD_IRQ_PRIMARY 2
+#define PSURGE_QUAD_CKSTOP_CTL 3
+#define PSURGE_QUAD_PRIMARY_ARB 4
+#define PSURGE_QUAD_BOARD_ID 6
+#define PSURGE_QUAD_WHICH_CPU 7
+#define PSURGE_QUAD_CKSTOP_RDBK 8
+#define PSURGE_QUAD_RESET_CTL 11
+
+#define PSURGE_QUAD_OUT(r, v) (out_8(quad_base + ((r) << 4) + 4, (v)))
+#define PSURGE_QUAD_IN(r) (in_8(quad_base + ((r) << 4) + 4) & 0x0f)
+#define PSURGE_QUAD_BIS(r, v) (PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) | (v)))
+#define PSURGE_QUAD_BIC(r, v) (PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) & ~(v)))
+
+/* virtual addresses for the above */
+static volatile u8 __iomem *hhead_base;
+static volatile u8 __iomem *quad_base;
+static volatile u32 __iomem *psurge_pri_intr;
+static volatile u8 __iomem *psurge_sec_intr;
+static volatile u32 __iomem *psurge_start;
+
+/* values for psurge_type */
+#define PSURGE_NONE -1
+#define PSURGE_DUAL 0
+#define PSURGE_QUAD_OKEE 1
+#define PSURGE_QUAD_COTTON 2
+#define PSURGE_QUAD_ICEGRASS 3
+
+/* what sort of powersurge board we have */
+static int psurge_type = PSURGE_NONE;
+
+/* irq for secondary cpus to report */
+static struct irq_domain *psurge_host;
+int psurge_secondary_virq;
+
+/*
+ * Set and clear IPIs for powersurge.
+ */
+static inline void psurge_set_ipi(int cpu)
+{
+ if (psurge_type == PSURGE_NONE)
+ return;
+ if (cpu == 0)
+ in_be32(psurge_pri_intr);
+ else if (psurge_type == PSURGE_DUAL)
+ out_8(psurge_sec_intr, 0);
+ else
+ PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_SET, 1 << cpu);
+}
+
+static inline void psurge_clr_ipi(int cpu)
+{
+ if (cpu > 0) {
+ switch(psurge_type) {
+ case PSURGE_DUAL:
+ out_8(psurge_sec_intr, ~0);
+ break;
+ case PSURGE_NONE:
+ break;
+ default:
+ PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, 1 << cpu);
+ }
+ }
+}
+
+/*
+ * On powersurge (old SMP powermac architecture) we don't have
+ * separate IPIs for separate messages like openpic does. Instead
+ * use the generic demux helpers
+ * -- paulus.
+ */
+static irqreturn_t psurge_ipi_intr(int irq, void *d)
+{
+ psurge_clr_ipi(smp_processor_id());
+ smp_ipi_demux();
+
+ return IRQ_HANDLED;
+}
+
+static void smp_psurge_cause_ipi(int cpu)
+{
+ psurge_set_ipi(cpu);
+}
+
+static int psurge_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_percpu_irq);
+
+ return 0;
+}
+
+static const struct irq_domain_ops psurge_host_ops = {
+ .map = psurge_host_map,
+};
+
+static int __init psurge_secondary_ipi_init(void)
+{
+ int rc = -ENOMEM;
+
+ psurge_host = irq_domain_add_nomap(NULL, ~0, &psurge_host_ops, NULL);
+
+ if (psurge_host)
+ psurge_secondary_virq = irq_create_direct_mapping(psurge_host);
+
+ if (psurge_secondary_virq)
+ rc = request_irq(psurge_secondary_virq, psurge_ipi_intr,
+ IRQF_PERCPU | IRQF_NO_THREAD, "IPI", NULL);
+
+ if (rc)
+ pr_err("Failed to setup secondary cpu IPI\n");
+
+ return rc;
+}
+
+/*
+ * Determine a quad card presence. We read the board ID register, we
+ * force the data bus to change to something else, and we read it again.
+ * It it's stable, then the register probably exist (ugh !)
+ */
+static int __init psurge_quad_probe(void)
+{
+ int type;
+ unsigned int i;
+
+ type = PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID);
+ if (type < PSURGE_QUAD_OKEE || type > PSURGE_QUAD_ICEGRASS
+ || type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID))
+ return PSURGE_DUAL;
+
+ /* looks OK, try a slightly more rigorous test */
+ /* bogus is not necessarily cacheline-aligned,
+ though I don't suppose that really matters. -- paulus */
+ for (i = 0; i < 100; i++) {
+ volatile u32 bogus[8];
+ bogus[(0+i)%8] = 0x00000000;
+ bogus[(1+i)%8] = 0x55555555;
+ bogus[(2+i)%8] = 0xFFFFFFFF;
+ bogus[(3+i)%8] = 0xAAAAAAAA;
+ bogus[(4+i)%8] = 0x33333333;
+ bogus[(5+i)%8] = 0xCCCCCCCC;
+ bogus[(6+i)%8] = 0xCCCCCCCC;
+ bogus[(7+i)%8] = 0x33333333;
+ wmb();
+ asm volatile("dcbf 0,%0" : : "r" (bogus) : "memory");
+ mb();
+ if (type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID))
+ return PSURGE_DUAL;
+ }
+ return type;
+}
+
+static void __init psurge_quad_init(void)
+{
+ int procbits;
+
+ if (ppc_md.progress) ppc_md.progress("psurge_quad_init", 0x351);
+ procbits = ~PSURGE_QUAD_IN(PSURGE_QUAD_WHICH_CPU);
+ if (psurge_type == PSURGE_QUAD_ICEGRASS)
+ PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits);
+ else
+ PSURGE_QUAD_BIC(PSURGE_QUAD_CKSTOP_CTL, procbits);
+ mdelay(33);
+ out_8(psurge_sec_intr, ~0);
+ PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, procbits);
+ PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits);
+ if (psurge_type != PSURGE_QUAD_ICEGRASS)
+ PSURGE_QUAD_BIS(PSURGE_QUAD_CKSTOP_CTL, procbits);
+ PSURGE_QUAD_BIC(PSURGE_QUAD_PRIMARY_ARB, procbits);
+ mdelay(33);
+ PSURGE_QUAD_BIC(PSURGE_QUAD_RESET_CTL, procbits);
+ mdelay(33);
+ PSURGE_QUAD_BIS(PSURGE_QUAD_PRIMARY_ARB, procbits);
+ mdelay(33);
+}
+
+static void __init smp_psurge_probe(void)
+{
+ int i, ncpus;
+ struct device_node *dn;
+
+ /*
+ * The powersurge cpu board can be used in the generation
+ * of powermacs that have a socket for an upgradeable cpu card,
+ * including the 7500, 8500, 9500, 9600.
+ * The device tree doesn't tell you if you have 2 cpus because
+ * OF doesn't know anything about the 2nd processor.
+ * Instead we look for magic bits in magic registers,
+ * in the hammerhead memory controller in the case of the
+ * dual-cpu powersurge board. -- paulus.
+ */
+ dn = of_find_node_by_name(NULL, "hammerhead");
+ if (dn == NULL)
+ return;
+ of_node_put(dn);
+
+ hhead_base = ioremap(HAMMERHEAD_BASE, 0x800);
+ quad_base = ioremap(PSURGE_QUAD_REG_ADDR, 1024);
+ psurge_sec_intr = hhead_base + HHEAD_SEC_INTR;
+
+ psurge_type = psurge_quad_probe();
+ if (psurge_type != PSURGE_DUAL) {
+ psurge_quad_init();
+ /* All released cards using this HW design have 4 CPUs */
+ ncpus = 4;
+ /* No sure how timebase sync works on those, let's use SW */
+ smp_ops->give_timebase = smp_generic_give_timebase;
+ smp_ops->take_timebase = smp_generic_take_timebase;
+ } else {
+ iounmap(quad_base);
+ if ((in_8(hhead_base + HHEAD_CONFIG) & 0x02) == 0) {
+ /* not a dual-cpu card */
+ iounmap(hhead_base);
+ psurge_type = PSURGE_NONE;
+ return;
+ }
+ ncpus = 2;
+ }
+
+ if (psurge_secondary_ipi_init())
+ return;
+
+ psurge_start = ioremap(PSURGE_START, 4);
+ psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4);
+
+ /* This is necessary because OF doesn't know about the
+ * secondary cpu(s), and thus there aren't nodes in the
+ * device tree for them, and smp_setup_cpu_maps hasn't
+ * set their bits in cpu_present_mask.
+ */
+ if (ncpus > NR_CPUS)
+ ncpus = NR_CPUS;
+ for (i = 1; i < ncpus ; ++i)
+ set_cpu_present(i, true);
+
+ if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352);
+}
+
+static int __init smp_psurge_kick_cpu(int nr)
+{
+ unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8;
+ unsigned long a, flags;
+ int i, j;
+
+ /* Defining this here is evil ... but I prefer hiding that
+ * crap to avoid giving people ideas that they can do the
+ * same.
+ */
+ extern volatile unsigned int cpu_callin_map[NR_CPUS];
+
+ /* may need to flush here if secondary bats aren't setup */
+ for (a = KERNELBASE; a < KERNELBASE + 0x800000; a += 32)
+ asm volatile("dcbf 0,%0" : : "r" (a) : "memory");
+ asm volatile("sync");
+
+ if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu", 0x353);
+
+ /* This is going to freeze the timeebase, we disable interrupts */
+ local_irq_save(flags);
+
+ out_be32(psurge_start, start);
+ mb();
+
+ psurge_set_ipi(nr);
+
+ /*
+ * We can't use udelay here because the timebase is now frozen.
+ */
+ for (i = 0; i < 2000; ++i)
+ asm volatile("nop" : : : "memory");
+ psurge_clr_ipi(nr);
+
+ /*
+ * Also, because the timebase is frozen, we must not return to the
+ * caller which will try to do udelay's etc... Instead, we wait -here-
+ * for the CPU to callin.
+ */
+ for (i = 0; i < 100000 && !cpu_callin_map[nr]; ++i) {
+ for (j = 1; j < 10000; j++)
+ asm volatile("nop" : : : "memory");
+ asm volatile("sync" : : : "memory");
+ }
+ if (!cpu_callin_map[nr])
+ goto stuck;
+
+ /* And we do the TB sync here too for standard dual CPU cards */
+ if (psurge_type == PSURGE_DUAL) {
+ while(!tb_req)
+ barrier();
+ tb_req = 0;
+ mb();
+ timebase = get_tb();
+ mb();
+ while (timebase)
+ barrier();
+ mb();
+ }
+ stuck:
+ /* now interrupt the secondary, restarting both TBs */
+ if (psurge_type == PSURGE_DUAL)
+ psurge_set_ipi(1);
+
+ if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354);
+
+ return 0;
+}
+
+static void __init smp_psurge_setup_cpu(int cpu_nr)
+{
+ unsigned long flags = IRQF_PERCPU | IRQF_NO_THREAD;
+ int irq;
+
+ if (cpu_nr != 0 || !psurge_start)
+ return;
+
+ /* reset the entry point so if we get another intr we won't
+ * try to startup again */
+ out_be32(psurge_start, 0x100);
+ irq = irq_create_mapping(NULL, 30);
+ if (request_irq(irq, psurge_ipi_intr, flags, "primary IPI", NULL))
+ printk(KERN_ERR "Couldn't get primary IPI interrupt");
+}
+
+void __init smp_psurge_take_timebase(void)
+{
+ if (psurge_type != PSURGE_DUAL)
+ return;
+
+ tb_req = 1;
+ mb();
+ while (!timebase)
+ barrier();
+ mb();
+ set_tb(timebase >> 32, timebase & 0xffffffff);
+ timebase = 0;
+ mb();
+ set_dec(tb_ticks_per_jiffy/2);
+}
+
+void __init smp_psurge_give_timebase(void)
+{
+ /* Nothing to do here */
+}
+
+/* PowerSurge-style Macs */
+struct smp_ops_t psurge_smp_ops = {
+ .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */
+ .cause_ipi = smp_psurge_cause_ipi,
+ .cause_nmi_ipi = NULL,
+ .probe = smp_psurge_probe,
+ .kick_cpu = smp_psurge_kick_cpu,
+ .setup_cpu = smp_psurge_setup_cpu,
+ .give_timebase = smp_psurge_give_timebase,
+ .take_timebase = smp_psurge_take_timebase,
+};
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
+
+/*
+ * Core 99 and later support
+ */
+
+
+static void smp_core99_give_timebase(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ while(!tb_req)
+ barrier();
+ tb_req = 0;
+ (*pmac_tb_freeze)(1);
+ mb();
+ timebase = get_tb();
+ mb();
+ while (timebase)
+ barrier();
+ mb();
+ (*pmac_tb_freeze)(0);
+ mb();
+
+ local_irq_restore(flags);
+}
+
+
+static void smp_core99_take_timebase(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ tb_req = 1;
+ mb();
+ while (!timebase)
+ barrier();
+ mb();
+ set_tb(timebase >> 32, timebase & 0xffffffff);
+ timebase = 0;
+ mb();
+
+ local_irq_restore(flags);
+}
+
+#ifdef CONFIG_PPC64
+/*
+ * G5s enable/disable the timebase via an i2c-connected clock chip.
+ */
+static struct pmac_i2c_bus *pmac_tb_clock_chip_host;
+static u8 pmac_tb_pulsar_addr;
+
+static void smp_core99_cypress_tb_freeze(int freeze)
+{
+ u8 data;
+ int rc;
+
+ /* Strangely, the device-tree says address is 0xd2, but darwin
+ * accesses 0xd0 ...
+ */
+ pmac_i2c_setmode(pmac_tb_clock_chip_host,
+ pmac_i2c_mode_combined);
+ rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+ 0xd0 | pmac_i2c_read,
+ 1, 0x81, &data, 1);
+ if (rc != 0)
+ goto bail;
+
+ data = (data & 0xf3) | (freeze ? 0x00 : 0x0c);
+
+ pmac_i2c_setmode(pmac_tb_clock_chip_host, pmac_i2c_mode_stdsub);
+ rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+ 0xd0 | pmac_i2c_write,
+ 1, 0x81, &data, 1);
+
+ bail:
+ if (rc != 0) {
+ printk("Cypress Timebase %s rc: %d\n",
+ freeze ? "freeze" : "unfreeze", rc);
+ panic("Timebase freeze failed !\n");
+ }
+}
+
+
+static void smp_core99_pulsar_tb_freeze(int freeze)
+{
+ u8 data;
+ int rc;
+
+ pmac_i2c_setmode(pmac_tb_clock_chip_host,
+ pmac_i2c_mode_combined);
+ rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+ pmac_tb_pulsar_addr | pmac_i2c_read,
+ 1, 0x2e, &data, 1);
+ if (rc != 0)
+ goto bail;
+
+ data = (data & 0x88) | (freeze ? 0x11 : 0x22);
+
+ pmac_i2c_setmode(pmac_tb_clock_chip_host, pmac_i2c_mode_stdsub);
+ rc = pmac_i2c_xfer(pmac_tb_clock_chip_host,
+ pmac_tb_pulsar_addr | pmac_i2c_write,
+ 1, 0x2e, &data, 1);
+ bail:
+ if (rc != 0) {
+ printk(KERN_ERR "Pulsar Timebase %s rc: %d\n",
+ freeze ? "freeze" : "unfreeze", rc);
+ panic("Timebase freeze failed !\n");
+ }
+}
+
+static void __init smp_core99_setup_i2c_hwsync(int ncpus)
+{
+ struct device_node *cc = NULL;
+ struct device_node *p;
+ const char *name = NULL;
+ const u32 *reg;
+ int ok;
+
+ /* Look for the clock chip */
+ for_each_node_by_name(cc, "i2c-hwclock") {
+ p = of_get_parent(cc);
+ ok = p && of_device_is_compatible(p, "uni-n-i2c");
+ of_node_put(p);
+ if (!ok)
+ continue;
+
+ pmac_tb_clock_chip_host = pmac_i2c_find_bus(cc);
+ if (pmac_tb_clock_chip_host == NULL)
+ continue;
+ reg = of_get_property(cc, "reg", NULL);
+ if (reg == NULL)
+ continue;
+ switch (*reg) {
+ case 0xd2:
+ if (of_device_is_compatible(cc,"pulsar-legacy-slewing")) {
+ pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
+ pmac_tb_pulsar_addr = 0xd2;
+ name = "Pulsar";
+ } else if (of_device_is_compatible(cc, "cy28508")) {
+ pmac_tb_freeze = smp_core99_cypress_tb_freeze;
+ name = "Cypress";
+ }
+ break;
+ case 0xd4:
+ pmac_tb_freeze = smp_core99_pulsar_tb_freeze;
+ pmac_tb_pulsar_addr = 0xd4;
+ name = "Pulsar";
+ break;
+ }
+ if (pmac_tb_freeze != NULL)
+ break;
+ }
+ if (pmac_tb_freeze != NULL) {
+ /* Open i2c bus for synchronous access */
+ if (pmac_i2c_open(pmac_tb_clock_chip_host, 1)) {
+ printk(KERN_ERR "Failed top open i2c bus for clock"
+ " sync, fallback to software sync !\n");
+ goto no_i2c_sync;
+ }
+ printk(KERN_INFO "Processor timebase sync using %s i2c clock\n",
+ name);
+ return;
+ }
+ no_i2c_sync:
+ pmac_tb_freeze = NULL;
+ pmac_tb_clock_chip_host = NULL;
+}
+
+
+
+/*
+ * Newer G5s uses a platform function
+ */
+
+static void smp_core99_pfunc_tb_freeze(int freeze)
+{
+ struct device_node *cpus;
+ struct pmf_args args;
+
+ cpus = of_find_node_by_path("/cpus");
+ BUG_ON(cpus == NULL);
+ args.count = 1;
+ args.u[0].v = !freeze;
+ pmf_call_function(cpus, "cpu-timebase", &args);
+ of_node_put(cpus);
+}
+
+#else /* CONFIG_PPC64 */
+
+/*
+ * SMP G4 use a GPIO to enable/disable the timebase.
+ */
+
+static unsigned int core99_tb_gpio; /* Timebase freeze GPIO */
+
+static void smp_core99_gpio_tb_freeze(int freeze)
+{
+ if (freeze)
+ pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4);
+ else
+ pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0);
+ pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0);
+}
+
+
+#endif /* !CONFIG_PPC64 */
+
+static void core99_init_caches(int cpu)
+{
+#ifndef CONFIG_PPC64
+ /* L2 and L3 cache settings to pass from CPU0 to CPU1 on G4 cpus */
+ static long int core99_l2_cache;
+ static long int core99_l3_cache;
+
+ if (!cpu_has_feature(CPU_FTR_L2CR))
+ return;
+
+ if (cpu == 0) {
+ core99_l2_cache = _get_L2CR();
+ printk("CPU0: L2CR is %lx\n", core99_l2_cache);
+ } else {
+ printk("CPU%d: L2CR was %lx\n", cpu, _get_L2CR());
+ _set_L2CR(0);
+ _set_L2CR(core99_l2_cache);
+ printk("CPU%d: L2CR set to %lx\n", cpu, core99_l2_cache);
+ }
+
+ if (!cpu_has_feature(CPU_FTR_L3CR))
+ return;
+
+ if (cpu == 0){
+ core99_l3_cache = _get_L3CR();
+ printk("CPU0: L3CR is %lx\n", core99_l3_cache);
+ } else {
+ printk("CPU%d: L3CR was %lx\n", cpu, _get_L3CR());
+ _set_L3CR(0);
+ _set_L3CR(core99_l3_cache);
+ printk("CPU%d: L3CR set to %lx\n", cpu, core99_l3_cache);
+ }
+#endif /* !CONFIG_PPC64 */
+}
+
+static void __init smp_core99_setup(int ncpus)
+{
+#ifdef CONFIG_PPC64
+
+ /* i2c based HW sync on some G5s */
+ if (of_machine_is_compatible("PowerMac7,2") ||
+ of_machine_is_compatible("PowerMac7,3") ||
+ of_machine_is_compatible("RackMac3,1"))
+ smp_core99_setup_i2c_hwsync(ncpus);
+
+ /* pfunc based HW sync on recent G5s */
+ if (pmac_tb_freeze == NULL) {
+ struct device_node *cpus =
+ of_find_node_by_path("/cpus");
+ if (cpus &&
+ of_property_read_bool(cpus, "platform-cpu-timebase")) {
+ pmac_tb_freeze = smp_core99_pfunc_tb_freeze;
+ printk(KERN_INFO "Processor timebase sync using"
+ " platform function\n");
+ }
+ of_node_put(cpus);
+ }
+
+#else /* CONFIG_PPC64 */
+
+ /* GPIO based HW sync on ppc32 Core99 */
+ if (pmac_tb_freeze == NULL && !of_machine_is_compatible("MacRISC4")) {
+ struct device_node *cpu;
+ const u32 *tbprop = NULL;
+
+ core99_tb_gpio = KL_GPIO_TB_ENABLE; /* default value */
+ cpu = of_find_node_by_type(NULL, "cpu");
+ if (cpu != NULL) {
+ tbprop = of_get_property(cpu, "timebase-enable", NULL);
+ if (tbprop)
+ core99_tb_gpio = *tbprop;
+ of_node_put(cpu);
+ }
+ pmac_tb_freeze = smp_core99_gpio_tb_freeze;
+ printk(KERN_INFO "Processor timebase sync using"
+ " GPIO 0x%02x\n", core99_tb_gpio);
+ }
+
+#endif /* CONFIG_PPC64 */
+
+ /* No timebase sync, fallback to software */
+ if (pmac_tb_freeze == NULL) {
+ smp_ops->give_timebase = smp_generic_give_timebase;
+ smp_ops->take_timebase = smp_generic_take_timebase;
+ printk(KERN_INFO "Processor timebase sync using software\n");
+ }
+
+#ifndef CONFIG_PPC64
+ {
+ int i;
+
+ /* XXX should get this from reg properties */
+ for (i = 1; i < ncpus; ++i)
+ set_hard_smp_processor_id(i, i);
+ }
+#endif
+
+ /* 32 bits SMP can't NAP */
+ if (!of_machine_is_compatible("MacRISC4"))
+ powersave_nap = 0;
+}
+
+static void __init smp_core99_probe(void)
+{
+ struct device_node *cpus;
+ int ncpus = 0;
+
+ if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345);
+
+ /* Count CPUs in the device-tree */
+ for_each_node_by_type(cpus, "cpu")
+ ++ncpus;
+
+ printk(KERN_INFO "PowerMac SMP probe found %d cpus\n", ncpus);
+
+ /* Nothing more to do if less than 2 of them */
+ if (ncpus <= 1)
+ return;
+
+ /* We need to perform some early initialisations before we can start
+ * setting up SMP as we are running before initcalls
+ */
+ pmac_pfunc_base_install();
+ pmac_i2c_init();
+
+ /* Setup various bits like timebase sync method, ability to nap, ... */
+ smp_core99_setup(ncpus);
+
+ /* Install IPIs */
+ mpic_request_ipis();
+
+ /* Collect l2cr and l3cr values from CPU 0 */
+ core99_init_caches(0);
+}
+
+static int smp_core99_kick_cpu(int nr)
+{
+ unsigned int save_vector;
+ unsigned long target, flags;
+ unsigned int *vector = (unsigned int *)(PAGE_OFFSET+0x100);
+
+ if (nr < 0 || nr > 3)
+ return -ENOENT;
+
+ if (ppc_md.progress)
+ ppc_md.progress("smp_core99_kick_cpu", 0x346);
+
+ local_irq_save(flags);
+
+ /* Save reset vector */
+ save_vector = *vector;
+
+ /* Setup fake reset vector that does
+ * b __secondary_start_pmac_0 + nr*8
+ */
+ target = (unsigned long) __secondary_start_pmac_0 + nr * 8;
+ patch_branch(vector, target, BRANCH_SET_LINK);
+
+ /* Put some life in our friend */
+ pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0);
+
+ /* FIXME: We wait a bit for the CPU to take the exception, I should
+ * instead wait for the entry code to set something for me. Well,
+ * ideally, all that crap will be done in prom.c and the CPU left
+ * in a RAM-based wait loop like CHRP.
+ */
+ mdelay(1);
+
+ /* Restore our exception vector */
+ patch_instruction(vector, ppc_inst(save_vector));
+
+ local_irq_restore(flags);
+ if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
+
+ return 0;
+}
+
+static void smp_core99_setup_cpu(int cpu_nr)
+{
+ /* Setup L2/L3 */
+ if (cpu_nr != 0)
+ core99_init_caches(cpu_nr);
+
+ /* Setup openpic */
+ mpic_setup_this_cpu();
+}
+
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned int smp_core99_host_open;
+
+static int smp_core99_cpu_prepare(unsigned int cpu)
+{
+ int rc;
+
+ /* Open i2c bus if it was used for tb sync */
+ if (pmac_tb_clock_chip_host && !smp_core99_host_open) {
+ rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1);
+ if (rc) {
+ pr_err("Failed to open i2c bus for time sync\n");
+ return notifier_from_errno(rc);
+ }
+ smp_core99_host_open = 1;
+ }
+ return 0;
+}
+
+static int smp_core99_cpu_online(unsigned int cpu)
+{
+ /* Close i2c bus if it was used for tb sync */
+ if (pmac_tb_clock_chip_host && smp_core99_host_open) {
+ pmac_i2c_close(pmac_tb_clock_chip_host);
+ smp_core99_host_open = 0;
+ }
+ return 0;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static void __init smp_core99_bringup_done(void)
+{
+ /* Close i2c bus if it was used for tb sync */
+ if (pmac_tb_clock_chip_host)
+ pmac_i2c_close(pmac_tb_clock_chip_host);
+
+ /* If we didn't start the second CPU, we must take
+ * it off the bus.
+ */
+ if (of_machine_is_compatible("MacRISC4") &&
+ num_online_cpus() < 2) {
+ set_cpu_present(1, false);
+ g5_phy_disable_cpu1();
+ }
+#ifdef CONFIG_HOTPLUG_CPU
+ cpuhp_setup_state_nocalls(CPUHP_POWERPC_PMAC_PREPARE,
+ "powerpc/pmac:prepare", smp_core99_cpu_prepare,
+ NULL);
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/pmac:online",
+ smp_core99_cpu_online, NULL);
+#endif
+
+ if (ppc_md.progress)
+ ppc_md.progress("smp_core99_bringup_done", 0x349);
+}
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int smp_core99_cpu_disable(void)
+{
+ int rc = generic_cpu_disable();
+ if (rc)
+ return rc;
+
+ mpic_cpu_set_priority(0xf);
+
+ cleanup_cpu_mmu_context();
+
+ return 0;
+}
+
+#ifdef CONFIG_PPC32
+
+static void pmac_cpu_offline_self(void)
+{
+ int cpu = smp_processor_id();
+
+ local_irq_disable();
+ idle_task_exit();
+ pr_debug("CPU%d offline\n", cpu);
+ generic_set_cpu_dead(cpu);
+ smp_wmb();
+ mb();
+ low_cpu_offline_self();
+}
+
+#else /* CONFIG_PPC32 */
+
+static void pmac_cpu_offline_self(void)
+{
+ int cpu = smp_processor_id();
+
+ local_irq_disable();
+ idle_task_exit();
+
+ /*
+ * turn off as much as possible, we'll be
+ * kicked out as this will only be invoked
+ * on core99 platforms for now ...
+ */
+
+ printk(KERN_INFO "CPU#%d offline\n", cpu);
+ generic_set_cpu_dead(cpu);
+ smp_wmb();
+
+ /*
+ * Re-enable interrupts. The NAP code needs to enable them
+ * anyways, do it now so we deal with the case where one already
+ * happened while soft-disabled.
+ * We shouldn't get any external interrupts, only decrementer, and the
+ * decrementer handler is safe for use on offline CPUs
+ */
+ local_irq_enable();
+
+ while (1) {
+ /* let's not take timer interrupts too often ... */
+ set_dec(0x7fffffff);
+
+ /* Enter NAP mode */
+ power4_idle();
+ }
+}
+
+#endif /* else CONFIG_PPC32 */
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/* Core99 Macs (dual G4s and G5s) */
+static struct smp_ops_t core99_smp_ops = {
+ .message_pass = smp_mpic_message_pass,
+ .probe = smp_core99_probe,
+#ifdef CONFIG_PPC64
+ .bringup_done = smp_core99_bringup_done,
+#endif
+ .kick_cpu = smp_core99_kick_cpu,
+ .setup_cpu = smp_core99_setup_cpu,
+ .give_timebase = smp_core99_give_timebase,
+ .take_timebase = smp_core99_take_timebase,
+#if defined(CONFIG_HOTPLUG_CPU)
+ .cpu_disable = smp_core99_cpu_disable,
+ .cpu_die = generic_cpu_die,
+#endif
+};
+
+void __init pmac_setup_smp(void)
+{
+ struct device_node *np;
+
+ /* Check for Core99 */
+ np = of_find_node_by_name(NULL, "uni-n");
+ if (!np)
+ np = of_find_node_by_name(NULL, "u3");
+ if (!np)
+ np = of_find_node_by_name(NULL, "u4");
+ if (np) {
+ of_node_put(np);
+ smp_ops = &core99_smp_ops;
+ }
+#ifdef CONFIG_PPC_PMAC32_PSURGE
+ else {
+ /* We have to set bits in cpu_possible_mask here since the
+ * secondary CPU(s) aren't in the device tree. Various
+ * things won't be initialized for CPUs not in the possible
+ * map, so we really need to fix it up here.
+ */
+ int cpu;
+
+ for (cpu = 1; cpu < 4 && cpu < NR_CPUS; ++cpu)
+ set_cpu_possible(cpu, true);
+ smp_ops = &psurge_smp_ops;
+ }
+#endif /* CONFIG_PPC_PMAC32_PSURGE */
+
+#ifdef CONFIG_HOTPLUG_CPU
+ smp_ops->cpu_offline_self = pmac_cpu_offline_self;
+#endif
+}
+
+
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
new file mode 100644
index 000000000..8633891b7
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for periodic interrupts (100 per second) and for getting
+ * the current time from the RTC on Power Macintoshes.
+ *
+ * We use the decrementer register for our periodic interrupts.
+ *
+ * Paul Mackerras August 1996.
+ * Copyright (C) 1996 Paul Mackerras.
+ * Copyright (C) 2003-2005 Benjamin Herrenschmidt.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/adb.h>
+#include <linux/cuda.h>
+#include <linux/pmu.h>
+#include <linux/interrupt.h>
+#include <linux/hardirq.h>
+#include <linux/rtc.h>
+#include <linux/of_address.h>
+
+#include <asm/early_ioremap.h>
+#include <asm/sections.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/smu.h>
+
+#include "pmac.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+/*
+ * Calibrate the decrementer frequency with the VIA timer 1.
+ */
+#define VIA_TIMER_FREQ_6 4700000 /* time 1 frequency * 6 */
+
+/* VIA registers */
+#define RS 0x200 /* skip between registers */
+#define T1CL (4*RS) /* Timer 1 ctr/latch (low 8 bits) */
+#define T1CH (5*RS) /* Timer 1 counter (high 8 bits) */
+#define T1LL (6*RS) /* Timer 1 latch (low 8 bits) */
+#define T1LH (7*RS) /* Timer 1 latch (high 8 bits) */
+#define ACR (11*RS) /* Auxiliary control register */
+#define IFR (13*RS) /* Interrupt flag register */
+
+/* Bits in ACR */
+#define T1MODE 0xc0 /* Timer 1 mode */
+#define T1MODE_CONT 0x40 /* continuous interrupts */
+
+/* Bits in IFR and IER */
+#define T1_INT 0x40 /* Timer 1 interrupt */
+
+long __init pmac_time_init(void)
+{
+ s32 delta = 0;
+#if defined(CONFIG_NVRAM) && defined(CONFIG_PPC32)
+ int dst;
+
+ delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16;
+ delta |= ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xa)) << 8;
+ delta |= pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xb);
+ if (delta & 0x00800000UL)
+ delta |= 0xFF000000UL;
+ dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0);
+ printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60,
+ dst ? "on" : "off");
+#endif
+ return delta;
+}
+
+#ifdef CONFIG_PMAC_SMU
+static time64_t smu_get_time(void)
+{
+ struct rtc_time tm;
+
+ if (smu_get_rtc_time(&tm, 1))
+ return 0;
+ return rtc_tm_to_time64(&tm);
+}
+#endif
+
+/* Can't be __init, it's called when suspending and resuming */
+time64_t pmac_get_boot_time(void)
+{
+ /* Get the time from the RTC, used only at boot time */
+ switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
+ case SYS_CTRLER_CUDA:
+ return cuda_get_time();
+#endif
+#ifdef CONFIG_ADB_PMU
+ case SYS_CTRLER_PMU:
+ return pmu_get_time();
+#endif
+#ifdef CONFIG_PMAC_SMU
+ case SYS_CTRLER_SMU:
+ return smu_get_time();
+#endif
+ default:
+ return 0;
+ }
+}
+
+void pmac_get_rtc_time(struct rtc_time *tm)
+{
+ /* Get the time from the RTC, used only at boot time */
+ switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
+ case SYS_CTRLER_CUDA:
+ rtc_time64_to_tm(cuda_get_time(), tm);
+ break;
+#endif
+#ifdef CONFIG_ADB_PMU
+ case SYS_CTRLER_PMU:
+ rtc_time64_to_tm(pmu_get_time(), tm);
+ break;
+#endif
+#ifdef CONFIG_PMAC_SMU
+ case SYS_CTRLER_SMU:
+ smu_get_rtc_time(tm, 1);
+ break;
+#endif
+ default:
+ ;
+ }
+}
+
+int pmac_set_rtc_time(struct rtc_time *tm)
+{
+ switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
+ case SYS_CTRLER_CUDA:
+ return cuda_set_rtc_time(tm);
+#endif
+#ifdef CONFIG_ADB_PMU
+ case SYS_CTRLER_PMU:
+ return pmu_set_rtc_time(tm);
+#endif
+#ifdef CONFIG_PMAC_SMU
+ case SYS_CTRLER_SMU:
+ return smu_set_rtc_time(tm, 1);
+#endif
+ default:
+ return -ENODEV;
+ }
+}
+
+#ifdef CONFIG_PPC32
+/*
+ * Calibrate the decrementer register using VIA timer 1.
+ * This is used both on powermacs and CHRP machines.
+ */
+static int __init via_calibrate_decr(void)
+{
+ struct device_node *vias;
+ volatile unsigned char __iomem *via;
+ int count = VIA_TIMER_FREQ_6 / 100;
+ unsigned int dstart, dend;
+ struct resource rsrc;
+
+ vias = of_find_node_by_name(NULL, "via-cuda");
+ if (vias == NULL)
+ vias = of_find_node_by_name(NULL, "via-pmu");
+ if (vias == NULL)
+ vias = of_find_node_by_name(NULL, "via");
+ if (vias == NULL || of_address_to_resource(vias, 0, &rsrc)) {
+ of_node_put(vias);
+ return 0;
+ }
+ of_node_put(vias);
+ via = early_ioremap(rsrc.start, resource_size(&rsrc));
+ if (via == NULL) {
+ printk(KERN_ERR "Failed to map VIA for timer calibration !\n");
+ return 0;
+ }
+
+ /* set timer 1 for continuous interrupts */
+ out_8(&via[ACR], (via[ACR] & ~T1MODE) | T1MODE_CONT);
+ /* set the counter to a small value */
+ out_8(&via[T1CH], 2);
+ /* set the latch to `count' */
+ out_8(&via[T1LL], count);
+ out_8(&via[T1LH], count >> 8);
+ /* wait until it hits 0 */
+ while ((in_8(&via[IFR]) & T1_INT) == 0)
+ ;
+ dstart = get_dec();
+ /* clear the interrupt & wait until it hits 0 again */
+ in_8(&via[T1CL]);
+ while ((in_8(&via[IFR]) & T1_INT) == 0)
+ ;
+ dend = get_dec();
+
+ ppc_tb_freq = (dstart - dend) * 100 / 6;
+
+ early_iounmap((void *)via, resource_size(&rsrc));
+
+ return 1;
+}
+#endif
+
+/*
+ * Query the OF and get the decr frequency.
+ */
+void __init pmac_calibrate_decr(void)
+{
+ generic_calibrate_decr();
+
+#ifdef CONFIG_PPC32
+ /* We assume MacRISC2 machines have correct device-tree
+ * calibration. That's better since the VIA itself seems
+ * to be slightly off. --BenH
+ */
+ if (!of_machine_is_compatible("MacRISC2") &&
+ !of_machine_is_compatible("MacRISC3") &&
+ !of_machine_is_compatible("MacRISC4"))
+ if (via_calibrate_decr())
+ return;
+
+ /* Special case: QuickSilver G4s seem to have a badly calibrated
+ * timebase-frequency in OF, VIA is much better on these. We should
+ * probably implement calibration based on the KL timer on these
+ * machines anyway... -BenH
+ */
+ if (of_machine_is_compatible("PowerMac3,5"))
+ if (via_calibrate_decr())
+ return;
+#endif
+}
diff --git a/arch/powerpc/platforms/powermac/udbg_adb.c b/arch/powerpc/platforms/powermac/udbg_adb.c
new file mode 100644
index 000000000..b4756defd
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/udbg_adb.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/bitops.h>
+#include <linux/ptrace.h>
+#include <linux/adb.h>
+#include <linux/pmu.h>
+#include <linux/cuda.h>
+#include <linux/of.h>
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/xmon.h>
+#include <asm/bootx.h>
+#include <asm/errno.h>
+#include <asm/pmac_feature.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/btext.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+
+/*
+ * This implementation is "special", it can "patch" the current
+ * udbg implementation and work on top of it. It must thus be
+ * initialized last
+ */
+
+static void (*udbg_adb_old_putc)(char c);
+static int (*udbg_adb_old_getc)(void);
+static int (*udbg_adb_old_getc_poll)(void);
+
+static enum {
+ input_adb_none,
+ input_adb_pmu,
+ input_adb_cuda,
+} input_type = input_adb_none;
+
+int xmon_wants_key, xmon_adb_keycode;
+
+static inline void udbg_adb_poll(void)
+{
+#ifdef CONFIG_ADB_PMU
+ if (input_type == input_adb_pmu)
+ pmu_poll_adb();
+#endif /* CONFIG_ADB_PMU */
+#ifdef CONFIG_ADB_CUDA
+ if (input_type == input_adb_cuda)
+ cuda_poll();
+#endif /* CONFIG_ADB_CUDA */
+}
+
+#ifdef CONFIG_BOOTX_TEXT
+
+static int udbg_adb_use_btext;
+static int xmon_adb_shiftstate;
+
+static unsigned char xmon_keytab[128] =
+ "asdfhgzxcv\000bqwer" /* 0x00 - 0x0f */
+ "yt123465=97-80]o" /* 0x10 - 0x1f */
+ "u[ip\rlj'k;\\,/nm." /* 0x20 - 0x2f */
+ "\t `\177\0\033\0\0\0\0\0\0\0\0\0\0" /* 0x30 - 0x3f */
+ "\0.\0*\0+\0\0\0\0\0/\r\0-\0" /* 0x40 - 0x4f */
+ "\0\0000123456789\0\0\0"; /* 0x50 - 0x5f */
+
+static unsigned char xmon_shift_keytab[128] =
+ "ASDFHGZXCV\000BQWER" /* 0x00 - 0x0f */
+ "YT!@#$^%+(&_*)}O" /* 0x10 - 0x1f */
+ "U{IP\rLJ\"K:|<?NM>" /* 0x20 - 0x2f */
+ "\t ~\177\0\033\0\0\0\0\0\0\0\0\0\0" /* 0x30 - 0x3f */
+ "\0.\0*\0+\0\0\0\0\0/\r\0-\0" /* 0x40 - 0x4f */
+ "\0\0000123456789\0\0\0"; /* 0x50 - 0x5f */
+
+static int udbg_adb_local_getc(void)
+{
+ int k, t, on;
+
+ xmon_wants_key = 1;
+ for (;;) {
+ xmon_adb_keycode = -1;
+ t = 0;
+ on = 0;
+ k = -1;
+ do {
+ if (--t < 0) {
+ on = 1 - on;
+ btext_drawchar(on? 0xdb: 0x20);
+ btext_drawchar('\b');
+ t = 200000;
+ }
+ udbg_adb_poll();
+ if (udbg_adb_old_getc_poll)
+ k = udbg_adb_old_getc_poll();
+ } while (k == -1 && xmon_adb_keycode == -1);
+ if (on)
+ btext_drawstring(" \b");
+ if (k != -1)
+ return k;
+ k = xmon_adb_keycode;
+
+ /* test for shift keys */
+ if ((k & 0x7f) == 0x38 || (k & 0x7f) == 0x7b) {
+ xmon_adb_shiftstate = (k & 0x80) == 0;
+ continue;
+ }
+ if (k >= 0x80)
+ continue; /* ignore up transitions */
+ k = (xmon_adb_shiftstate? xmon_shift_keytab: xmon_keytab)[k];
+ if (k != 0)
+ break;
+ }
+ xmon_wants_key = 0;
+ return k;
+}
+#endif /* CONFIG_BOOTX_TEXT */
+
+static int udbg_adb_getc(void)
+{
+#ifdef CONFIG_BOOTX_TEXT
+ if (udbg_adb_use_btext && input_type != input_adb_none)
+ return udbg_adb_local_getc();
+#endif
+ if (udbg_adb_old_getc)
+ return udbg_adb_old_getc();
+ return -1;
+}
+
+/* getc_poll() is not really used, unless you have the xmon-over modem
+ * hack that doesn't quite concern us here, thus we just poll the low level
+ * ADB driver to prevent it from timing out and call back the original poll
+ * routine.
+ */
+static int udbg_adb_getc_poll(void)
+{
+ udbg_adb_poll();
+
+ if (udbg_adb_old_getc_poll)
+ return udbg_adb_old_getc_poll();
+ return -1;
+}
+
+static void udbg_adb_putc(char c)
+{
+#ifdef CONFIG_BOOTX_TEXT
+ if (udbg_adb_use_btext)
+ btext_drawchar(c);
+#endif
+ if (udbg_adb_old_putc)
+ return udbg_adb_old_putc(c);
+}
+
+void __init udbg_adb_init_early(void)
+{
+#ifdef CONFIG_BOOTX_TEXT
+ if (btext_find_display(1) == 0) {
+ udbg_adb_use_btext = 1;
+ udbg_putc = udbg_adb_putc;
+ }
+#endif
+}
+
+int __init udbg_adb_init(int force_btext)
+{
+ struct device_node *np;
+
+ /* Capture existing callbacks */
+ udbg_adb_old_putc = udbg_putc;
+ udbg_adb_old_getc = udbg_getc;
+ udbg_adb_old_getc_poll = udbg_getc_poll;
+
+ /* Check if our early init was already called */
+ if (udbg_adb_old_putc == udbg_adb_putc)
+ udbg_adb_old_putc = NULL;
+#ifdef CONFIG_BOOTX_TEXT
+ if (udbg_adb_old_putc == btext_drawchar)
+ udbg_adb_old_putc = NULL;
+#endif
+
+ /* Set ours as output */
+ udbg_putc = udbg_adb_putc;
+ udbg_getc = udbg_adb_getc;
+ udbg_getc_poll = udbg_adb_getc_poll;
+
+#ifdef CONFIG_BOOTX_TEXT
+ /* Check if we should use btext output */
+ if (btext_find_display(force_btext) == 0)
+ udbg_adb_use_btext = 1;
+#endif
+
+ /* See if there is a keyboard in the device tree with a parent
+ * of type "adb". If not, we return a failure, but we keep the
+ * bext output set for now
+ */
+ for_each_node_by_name(np, "keyboard") {
+ struct device_node *parent = of_get_parent(np);
+ int found = of_node_is_type(parent, "adb");
+ of_node_put(parent);
+ if (found)
+ break;
+ }
+ if (np == NULL)
+ return -ENODEV;
+ of_node_put(np);
+
+#ifdef CONFIG_ADB_PMU
+ if (find_via_pmu())
+ input_type = input_adb_pmu;
+#endif
+#ifdef CONFIG_ADB_CUDA
+ if (find_via_cuda())
+ input_type = input_adb_cuda;
+#endif
+
+ /* Same as above: nothing found, keep btext set for output */
+ if (input_type == input_adb_none)
+ return -ENODEV;
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
new file mode 100644
index 000000000..1b7c39e84
--- /dev/null
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * udbg for zilog scc ports as found on Apple PowerMacs
+ *
+ * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
+ */
+#include <linux/types.h>
+#include <linux/of.h>
+#include <asm/udbg.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/pmac_feature.h>
+
+extern u8 real_readb(volatile u8 __iomem *addr);
+extern void real_writeb(u8 data, volatile u8 __iomem *addr);
+
+#define SCC_TXRDY 4
+#define SCC_RXRDY 1
+
+static volatile u8 __iomem *sccc;
+static volatile u8 __iomem *sccd;
+
+static void udbg_scc_putc(char c)
+{
+ if (sccc) {
+ while ((in_8(sccc) & SCC_TXRDY) == 0)
+ ;
+ out_8(sccd, c);
+ if (c == '\n')
+ udbg_scc_putc('\r');
+ }
+}
+
+static int udbg_scc_getc_poll(void)
+{
+ if (sccc) {
+ if ((in_8(sccc) & SCC_RXRDY) != 0)
+ return in_8(sccd);
+ else
+ return -1;
+ }
+ return -1;
+}
+
+static int udbg_scc_getc(void)
+{
+ if (sccc) {
+ while ((in_8(sccc) & SCC_RXRDY) == 0)
+ ;
+ return in_8(sccd);
+ }
+ return -1;
+}
+
+static unsigned char scc_inittab[] = {
+ 13, 0, /* set baud rate divisor */
+ 12, 0,
+ 14, 1, /* baud rate gen enable, src=rtxc */
+ 11, 0x50, /* clocks = br gen */
+ 5, 0xea, /* tx 8 bits, assert DTR & RTS */
+ 4, 0x46, /* x16 clock, 1 stop */
+ 3, 0xc1, /* rx enable, 8 bits */
+};
+
+void __init udbg_scc_init(int force_scc)
+{
+ const u32 *reg;
+ unsigned long addr;
+ struct device_node *stdout = NULL, *escc = NULL, *macio = NULL;
+ struct device_node *ch, *ch_def = NULL, *ch_a = NULL;
+ const char *path;
+ int i;
+
+ escc = of_find_node_by_name(NULL, "escc");
+ if (escc == NULL)
+ goto bail;
+ macio = of_get_parent(escc);
+ if (macio == NULL)
+ goto bail;
+ path = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (path != NULL)
+ stdout = of_find_node_by_path(path);
+ for_each_child_of_node(escc, ch) {
+ if (ch == stdout) {
+ of_node_put(ch_def);
+ ch_def = of_node_get(ch);
+ }
+ if (of_node_name_eq(ch, "ch-a")) {
+ of_node_put(ch_a);
+ ch_a = of_node_get(ch);
+ }
+ }
+ if (ch_def == NULL && !force_scc)
+ goto bail;
+
+ ch = ch_def ? ch_def : ch_a;
+
+ /* Get address within mac-io ASIC */
+ reg = of_get_property(escc, "reg", NULL);
+ if (reg == NULL)
+ goto bail;
+ addr = reg[0];
+
+ /* Get address of mac-io PCI itself */
+ reg = of_get_property(macio, "assigned-addresses", NULL);
+ if (reg == NULL)
+ goto bail;
+ addr += reg[2];
+
+ /* Lock the serial port */
+ pmac_call_feature(PMAC_FTR_SCC_ENABLE, ch,
+ PMAC_SCC_ASYNC | PMAC_SCC_FLAG_XMON, 1);
+
+ if (ch == ch_a)
+ addr += 0x20;
+ sccc = ioremap(addr & PAGE_MASK, PAGE_SIZE) ;
+ sccc += addr & ~PAGE_MASK;
+ sccd = sccc + 0x10;
+
+ mb();
+
+ for (i = 20000; i != 0; --i)
+ in_8(sccc);
+ out_8(sccc, 0x09); /* reset A or B side */
+ out_8(sccc, 0xc0);
+
+ /* If SCC was the OF output port, read the BRG value, else
+ * Setup for 38400 or 57600 8N1 depending on the machine
+ */
+ if (ch_def != NULL) {
+ out_8(sccc, 13);
+ scc_inittab[1] = in_8(sccc);
+ out_8(sccc, 12);
+ scc_inittab[3] = in_8(sccc);
+ } else if (of_machine_is_compatible("RackMac1,1")
+ || of_machine_is_compatible("RackMac1,2")
+ || of_machine_is_compatible("MacRISC4")) {
+ /* Xserves and G5s default to 57600 */
+ scc_inittab[1] = 0;
+ scc_inittab[3] = 0;
+ } else {
+ /* Others default to 38400 */
+ scc_inittab[1] = 0;
+ scc_inittab[3] = 1;
+ }
+
+ for (i = 0; i < sizeof(scc_inittab); ++i)
+ out_8(sccc, scc_inittab[i]);
+
+
+ udbg_putc = udbg_scc_putc;
+ udbg_getc = udbg_scc_getc;
+ udbg_getc_poll = udbg_scc_getc_poll;
+
+ udbg_puts("Hello World !\n");
+
+ bail:
+ of_node_put(macio);
+ of_node_put(escc);
+ of_node_put(stdout);
+ of_node_put(ch_def);
+ of_node_put(ch_a);
+}
+
+#ifdef CONFIG_PPC64
+static void udbg_real_scc_putc(char c)
+{
+ while ((real_readb(sccc) & SCC_TXRDY) == 0)
+ ;
+ real_writeb(c, sccd);
+ if (c == '\n')
+ udbg_real_scc_putc('\r');
+}
+
+void __init udbg_init_pmac_realmode(void)
+{
+ sccc = (volatile u8 __iomem *)0x80013020ul;
+ sccd = (volatile u8 __iomem *)0x80013030ul;
+
+ udbg_putc = udbg_real_scc_putc;
+ udbg_getc = NULL;
+ udbg_getc_poll = NULL;
+}
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
new file mode 100644
index 000000000..70a46acc7
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_POWERNV
+ depends on PPC64 && PPC_BOOK3S
+ bool "IBM PowerNV (Non-Virtualized) platform support"
+ select PPC_HASH_MMU_NATIVE if PPC_64S_HASH_MMU
+ select PPC_XICS
+ select PPC_ICP_NATIVE
+ select PPC_XIVE_NATIVE
+ select PPC_P7_NAP
+ select FORCE_PCI
+ select PCI_MSI
+ select EPAPR_BOOT
+ select PPC_INDIRECT_PIO
+ select PPC_UDBG_16550
+ select CPU_FREQ
+ select PPC_DOORBELL
+ select MMU_NOTIFIER
+ select FORCE_SMP
+ select ARCH_SUPPORTS_PER_VMA_LOCK
+ default y
+
+config OPAL_PRD
+ tristate "OPAL PRD driver"
+ depends on PPC_POWERNV
+ help
+ This enables the opal-prd driver, a facility to run processor
+ recovery diagnostics on OpenPower machines
+
+config PPC_MEMTRACE
+ bool "Enable runtime allocation of RAM for tracing"
+ depends on PPC_POWERNV && MEMORY_HOTPLUG && CONTIG_ALLOC
+ help
+ Enabling this option allows for runtime allocation of memory (RAM)
+ for hardware tracing.
+
+config SCOM_DEBUGFS
+ bool "Expose SCOM controllers via debugfs"
+ depends on DEBUG_FS
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
new file mode 100644
index 000000000..19f0fc5c6
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# nothing that deals with real mode is safe to KASAN
+# in particular, idle code runs a bunch of things in real mode
+KASAN_SANITIZE_idle.o := n
+KASAN_SANITIZE_pci-ioda.o := n
+KASAN_SANITIZE_pci-ioda-tce.o := n
+# pnv_machine_check_early
+KASAN_SANITIZE_setup.o := n
+
+obj-y += setup.o opal-call.o opal-wrappers.o opal.o opal-async.o
+obj-y += idle.o opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
+obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
+obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
+obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
+obj-y += ultravisor.o
+
+obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o
+obj-$(CONFIG_FA_DUMP) += opal-fadump.o
+obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o
+obj-$(CONFIG_OPAL_CORE) += opal-core.o
+obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o
+obj-$(CONFIG_PCI_IOV) += pci-sriov.o
+obj-$(CONFIG_CXL_BASE) += pci-cxl.o
+obj-$(CONFIG_EEH) += eeh-powernv.o
+obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o
+obj-$(CONFIG_OPAL_PRD) += opal-prd.o
+obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
+obj-$(CONFIG_PPC_MEMTRACE) += memtrace.o
+obj-$(CONFIG_PPC_VAS) += vas.o vas-window.o vas-debug.o vas-fault.o
+obj-$(CONFIG_OCXL_BASE) += ocxl.o
+obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
+obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
diff --git a/arch/powerpc/platforms/powernv/copy-paste.h b/arch/powerpc/platforms/powernv/copy-paste.h
new file mode 100644
index 000000000..f063807ed
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/copy-paste.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+
+/*
+ * Copy/paste instructions:
+ *
+ * copy RA,RB
+ * Copy contents of address (RA) + effective_address(RB)
+ * to internal copy-buffer.
+ *
+ * paste RA,RB
+ * Paste contents of internal copy-buffer to the address
+ * (RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+ asm volatile(PPC_COPY(%0, %1)";"
+ :
+ : "b" (offset), "b" (crb)
+ : "memory");
+
+ return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+ u32 cr;
+
+ cr = 0;
+ asm volatile(PPC_PASTE(%1, %2)";"
+ "mfocrf %0, 0x80;"
+ : "=r" (cr)
+ : "b" (offset), "b" (paste_address)
+ : "memory", "cr0");
+
+ /* We mask with 0xE to ignore SO */
+ return (cr >> CR0_SHIFT) & 0xE;
+}
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
new file mode 100644
index 000000000..af3a5d37a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -0,0 +1,1696 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV Platform dependent EEH operations
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ */
+
+#include <linux/atomic.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/list.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/firmware.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+#include <asm/ppc-pci.h>
+#include <asm/pnv-pci.h>
+
+#include "powernv.h"
+#include "pci.h"
+#include "../../../../drivers/pci/pci.h"
+
+static int eeh_event_irq = -EINVAL;
+
+static void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ dev_dbg(&pdev->dev, "EEH: Setting up device\n");
+ eeh_probe_device(pdev);
+}
+
+static irqreturn_t pnv_eeh_event(int irq, void *data)
+{
+ /*
+ * We simply send a special EEH event if EEH has been
+ * enabled. We don't care about EEH events until we've
+ * finished processing the outstanding ones. Event processing
+ * gets unmasked in next_error() if EEH is enabled.
+ */
+ disable_irq_nosync(irq);
+
+ if (eeh_enabled())
+ eeh_send_failure_event(NULL);
+
+ return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static ssize_t pnv_eeh_ei_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct pci_controller *hose = filp->private_data;
+ struct eeh_pe *pe;
+ int pe_no, type, func;
+ unsigned long addr, mask;
+ char buf[50];
+ int ret;
+
+ if (!eeh_ops || !eeh_ops->err_inject)
+ return -ENXIO;
+
+ /* Copy over argument buffer */
+ ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ /* Retrieve parameters */
+ ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
+ &pe_no, &type, &func, &addr, &mask);
+ if (ret != 5)
+ return -EINVAL;
+
+ /* Retrieve PE */
+ pe = eeh_pe_get(hose, pe_no);
+ if (!pe)
+ return -ENODEV;
+
+ /* Do error injection */
+ ret = eeh_ops->err_inject(pe, type, func, addr, mask);
+ return ret < 0 ? ret : count;
+}
+
+static const struct file_operations pnv_eeh_ei_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = pnv_eeh_ei_write,
+};
+
+static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
+{
+ struct pci_controller *hose = data;
+ struct pnv_phb *phb = hose->private_data;
+
+ out_be64(phb->regs + offset, val);
+ return 0;
+}
+
+static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
+{
+ struct pci_controller *hose = data;
+ struct pnv_phb *phb = hose->private_data;
+
+ *val = in_be64(phb->regs + offset);
+ return 0;
+}
+
+#define PNV_EEH_DBGFS_ENTRY(name, reg) \
+static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \
+{ \
+ return pnv_eeh_dbgfs_set(data, reg, val); \
+} \
+ \
+static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \
+{ \
+ return pnv_eeh_dbgfs_get(data, reg, val); \
+} \
+ \
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \
+ pnv_eeh_dbgfs_get_##name, \
+ pnv_eeh_dbgfs_set_##name, \
+ "0x%llx\n")
+
+PNV_EEH_DBGFS_ENTRY(outb, 0xD10);
+PNV_EEH_DBGFS_ENTRY(inbA, 0xD90);
+PNV_EEH_DBGFS_ENTRY(inbB, 0xE10);
+
+#endif /* CONFIG_DEBUG_FS */
+
+static void pnv_eeh_enable_phbs(void)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+ /*
+ * If EEH is enabled, we're going to rely on that.
+ * Otherwise, we restore to conventional mechanism
+ * to clear frozen PE during PCI config access.
+ */
+ if (eeh_enabled())
+ phb->flags |= PNV_PHB_FLAG_EEH;
+ else
+ phb->flags &= ~PNV_PHB_FLAG_EEH;
+ }
+}
+
+/**
+ * pnv_eeh_post_init - EEH platform dependent post initialization
+ *
+ * EEH platform dependent post initialization on powernv. When
+ * the function is called, the EEH PEs and devices should have
+ * been built. If the I/O cache staff has been built, EEH is
+ * ready to supply service.
+ */
+int pnv_eeh_post_init(void)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ int ret = 0;
+
+ eeh_show_enabled();
+
+ /* Register OPAL event notifier */
+ eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
+ if (eeh_event_irq < 0) {
+ pr_err("%s: Can't register OPAL event interrupt (%d)\n",
+ __func__, eeh_event_irq);
+ return eeh_event_irq;
+ }
+
+ ret = request_irq(eeh_event_irq, pnv_eeh_event,
+ IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL);
+ if (ret < 0) {
+ irq_dispose_mapping(eeh_event_irq);
+ pr_err("%s: Can't request OPAL event interrupt (%d)\n",
+ __func__, eeh_event_irq);
+ return ret;
+ }
+
+ if (!eeh_enabled())
+ disable_irq(eeh_event_irq);
+
+ pnv_eeh_enable_phbs();
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ /* Create debugfs entries */
+#ifdef CONFIG_DEBUG_FS
+ if (phb->has_dbgfs || !phb->dbgfs)
+ continue;
+
+ phb->has_dbgfs = 1;
+ debugfs_create_file("err_injct", 0200,
+ phb->dbgfs, hose,
+ &pnv_eeh_ei_fops);
+
+ debugfs_create_file("err_injct_outbound", 0600,
+ phb->dbgfs, hose,
+ &pnv_eeh_dbgfs_ops_outb);
+ debugfs_create_file("err_injct_inboundA", 0600,
+ phb->dbgfs, hose,
+ &pnv_eeh_dbgfs_ops_inbA);
+ debugfs_create_file("err_injct_inboundB", 0600,
+ phb->dbgfs, hose,
+ &pnv_eeh_dbgfs_ops_inbB);
+#endif /* CONFIG_DEBUG_FS */
+ }
+
+ return ret;
+}
+
+static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
+{
+ int pos = PCI_CAPABILITY_LIST;
+ int cnt = 48; /* Maximal number of capabilities */
+ u32 status, id;
+
+ if (!pdn)
+ return 0;
+
+ /* Check if the device supports capabilities */
+ pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
+ if (!(status & PCI_STATUS_CAP_LIST))
+ return 0;
+
+ while (cnt--) {
+ pnv_pci_cfg_read(pdn, pos, 1, &pos);
+ if (pos < 0x40)
+ break;
+
+ pos &= ~3;
+ pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+ if (id == 0xff)
+ break;
+
+ /* Found */
+ if (id == cap)
+ return pos;
+
+ /* Next one */
+ pos += PCI_CAP_LIST_NEXT;
+ }
+
+ return 0;
+}
+
+static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 header;
+ int pos = 256, ttl = (4096 - 256) / 8;
+
+ if (!edev || !edev->pcie_cap)
+ return 0;
+ if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+ return 0;
+ else if (!header)
+ return 0;
+
+ while (ttl-- > 0) {
+ if (PCI_EXT_CAP_ID(header) == cap && pos)
+ return pos;
+
+ pos = PCI_EXT_CAP_NEXT(header);
+ if (pos < 256)
+ break;
+
+ if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+ break;
+ }
+
+ return 0;
+}
+
+static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
+{
+ struct pci_controller *hose = pdev->bus->sysdata;
+ struct pnv_phb *phb = hose->private_data;
+ struct pci_dev *parent = pdev->bus->self;
+
+#ifdef CONFIG_PCI_IOV
+ /* for VFs we use the PF's PE as the upstream PE */
+ if (pdev->is_virtfn)
+ parent = pdev->physfn;
+#endif
+
+ /* otherwise use the PE of our parent bridge */
+ if (parent) {
+ struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
+
+ return eeh_pe_get(phb->hose, ioda_pe->pe_number);
+ }
+
+ return NULL;
+}
+
+/**
+ * pnv_eeh_probe - Do probe on PCI device
+ * @pdev: pci_dev to probe
+ *
+ * Create, or find the existing, eeh_dev for this pci_dev.
+ */
+static struct eeh_dev *pnv_eeh_probe(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ struct pci_controller *hose = pdn->phb;
+ struct pnv_phb *phb = hose->private_data;
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ struct eeh_pe *upstream_pe;
+ uint32_t pcie_flags;
+ int ret;
+ int config_addr = (pdn->busno << 8) | (pdn->devfn);
+
+ /*
+ * When probing the root bridge, which doesn't have any
+ * subordinate PCI devices. We don't have OF node for
+ * the root bridge. So it's not reasonable to continue
+ * the probing.
+ */
+ if (!edev || edev->pe)
+ return NULL;
+
+ /* already configured? */
+ if (edev->pdev) {
+ pr_debug("%s: found existing edev for %04x:%02x:%02x.%01x\n",
+ __func__, hose->global_number, config_addr >> 8,
+ PCI_SLOT(config_addr), PCI_FUNC(config_addr));
+ return edev;
+ }
+
+ /* Skip for PCI-ISA bridge */
+ if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
+ return NULL;
+
+ eeh_edev_dbg(edev, "Probing device\n");
+
+ /* Initialize eeh device */
+ edev->mode &= 0xFFFFFF00;
+ edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+ edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+ edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
+ edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
+ if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+ edev->mode |= EEH_DEV_BRIDGE;
+ if (edev->pcie_cap) {
+ pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+ 2, &pcie_flags);
+ pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+ if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+ edev->mode |= EEH_DEV_ROOT_PORT;
+ else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+ edev->mode |= EEH_DEV_DS_PORT;
+ }
+ }
+
+ edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];
+
+ upstream_pe = pnv_eeh_get_upstream_pe(pdev);
+
+ /* Create PE */
+ ret = eeh_pe_tree_insert(edev, upstream_pe);
+ if (ret) {
+ eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret);
+ return NULL;
+ }
+
+ /*
+ * If the PE contains any one of following adapters, the
+ * PCI config space can't be accessed when dumping EEH log.
+ * Otherwise, we will run into fenced PHB caused by shortage
+ * of outbound credits in the adapter. The PCI config access
+ * should be blocked until PE reset. MMIO access is dropped
+ * by hardware certainly. In order to drop PCI config requests,
+ * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which
+ * will be checked in the backend for PE state retrieval. If
+ * the PE becomes frozen for the first time and the flag has
+ * been set for the PE, we will set EEH_PE_CFG_BLOCKED for
+ * that PE to block its config space.
+ *
+ * Broadcom BCM5718 2-ports NICs (14e4:1656)
+ * Broadcom Austin 4-ports NICs (14e4:1657)
+ * Broadcom Shiner 4-ports 1G NICs (14e4:168a)
+ * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
+ */
+ if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+ pdn->device_id == 0x1656) ||
+ (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+ pdn->device_id == 0x1657) ||
+ (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+ pdn->device_id == 0x168a) ||
+ (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+ pdn->device_id == 0x168e))
+ edev->pe->state |= EEH_PE_CFG_RESTRICTED;
+
+ /*
+ * Cache the PE primary bus, which can't be fetched when
+ * full hotplug is in progress. In that case, all child
+ * PCI devices of the PE are expected to be removed prior
+ * to PE reset.
+ */
+ if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
+ edev->pe->bus = pci_find_bus(hose->global_number,
+ pdn->busno);
+ if (edev->pe->bus)
+ edev->pe->state |= EEH_PE_PRI_BUS;
+ }
+
+ /*
+ * Enable EEH explicitly so that we will do EEH check
+ * while accessing I/O stuff
+ */
+ if (!eeh_has_flag(EEH_ENABLED)) {
+ enable_irq(eeh_event_irq);
+ pnv_eeh_enable_phbs();
+ eeh_add_flag(EEH_ENABLED);
+ }
+
+ /* Save memory bars */
+ eeh_save_bars(edev);
+
+ eeh_edev_dbg(edev, "EEH enabled on device\n");
+
+ return edev;
+}
+
+/**
+ * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @pe: EEH PE
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
+{
+ struct pci_controller *hose = pe->phb;
+ struct pnv_phb *phb = hose->private_data;
+ bool freeze_pe = false;
+ int opt;
+ s64 rc;
+
+ switch (option) {
+ case EEH_OPT_DISABLE:
+ return -EPERM;
+ case EEH_OPT_ENABLE:
+ return 0;
+ case EEH_OPT_THAW_MMIO:
+ opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
+ break;
+ case EEH_OPT_THAW_DMA:
+ opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
+ break;
+ case EEH_OPT_FREEZE_PE:
+ freeze_pe = true;
+ opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
+ break;
+ default:
+ pr_warn("%s: Invalid option %d\n", __func__, option);
+ return -EINVAL;
+ }
+
+ /* Freeze master and slave PEs if PHB supports compound PEs */
+ if (freeze_pe) {
+ if (phb->freeze_pe) {
+ phb->freeze_pe(phb, pe->addr);
+ return 0;
+ }
+
+ rc = opal_pci_eeh_freeze_set(phb->opal_id, pe->addr, opt);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+ __func__, rc, phb->hose->global_number,
+ pe->addr);
+ return -EIO;
+ }
+
+ return 0;
+ }
+
+ /* Unfreeze master and slave PEs if PHB supports */
+ if (phb->unfreeze_pe)
+ return phb->unfreeze_pe(phb, pe->addr, opt);
+
+ rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe->addr, opt);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld enable %d for PHB#%x-PE#%x\n",
+ __func__, rc, option, phb->hose->global_number,
+ pe->addr);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
+{
+ struct pnv_phb *phb = pe->phb->private_data;
+ s64 rc;
+
+ rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
+ phb->diag_data_size);
+ if (rc != OPAL_SUCCESS)
+ pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
+ __func__, rc, pe->phb->global_number);
+}
+
+static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
+{
+ struct pnv_phb *phb = pe->phb->private_data;
+ u8 fstate = 0;
+ __be16 pcierr = 0;
+ s64 rc;
+ int result = 0;
+
+ rc = opal_pci_eeh_freeze_status(phb->opal_id,
+ pe->addr,
+ &fstate,
+ &pcierr,
+ NULL);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld getting PHB#%x state\n",
+ __func__, rc, phb->hose->global_number);
+ return EEH_STATE_NOT_SUPPORT;
+ }
+
+ /*
+ * Check PHB state. If the PHB is frozen for the
+ * first time, to dump the PHB diag-data.
+ */
+ if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
+ result = (EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_DMA_ACTIVE |
+ EEH_STATE_MMIO_ENABLED |
+ EEH_STATE_DMA_ENABLED);
+ } else if (!(pe->state & EEH_PE_ISOLATED)) {
+ eeh_pe_mark_isolated(pe);
+ pnv_eeh_get_phb_diag(pe);
+
+ if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+ }
+
+ return result;
+}
+
+static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
+{
+ struct pnv_phb *phb = pe->phb->private_data;
+ u8 fstate = 0;
+ __be16 pcierr = 0;
+ s64 rc;
+ int result;
+
+ /*
+ * We don't clobber hardware frozen state until PE
+ * reset is completed. In order to keep EEH core
+ * moving forward, we have to return operational
+ * state during PE reset.
+ */
+ if (pe->state & EEH_PE_RESET) {
+ result = (EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_DMA_ACTIVE |
+ EEH_STATE_MMIO_ENABLED |
+ EEH_STATE_DMA_ENABLED);
+ return result;
+ }
+
+ /*
+ * Fetch PE state from hardware. If the PHB
+ * supports compound PE, let it handle that.
+ */
+ if (phb->get_pe_state) {
+ fstate = phb->get_pe_state(phb, pe->addr);
+ } else {
+ rc = opal_pci_eeh_freeze_status(phb->opal_id,
+ pe->addr,
+ &fstate,
+ &pcierr,
+ NULL);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
+ __func__, rc, phb->hose->global_number,
+ pe->addr);
+ return EEH_STATE_NOT_SUPPORT;
+ }
+ }
+
+ /* Figure out state */
+ switch (fstate) {
+ case OPAL_EEH_STOPPED_NOT_FROZEN:
+ result = (EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_DMA_ACTIVE |
+ EEH_STATE_MMIO_ENABLED |
+ EEH_STATE_DMA_ENABLED);
+ break;
+ case OPAL_EEH_STOPPED_MMIO_FREEZE:
+ result = (EEH_STATE_DMA_ACTIVE |
+ EEH_STATE_DMA_ENABLED);
+ break;
+ case OPAL_EEH_STOPPED_DMA_FREEZE:
+ result = (EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_MMIO_ENABLED);
+ break;
+ case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+ result = 0;
+ break;
+ case OPAL_EEH_STOPPED_RESET:
+ result = EEH_STATE_RESET_ACTIVE;
+ break;
+ case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+ result = EEH_STATE_UNAVAILABLE;
+ break;
+ case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+ result = EEH_STATE_NOT_SUPPORT;
+ break;
+ default:
+ result = EEH_STATE_NOT_SUPPORT;
+ pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
+ __func__, phb->hose->global_number,
+ pe->addr, fstate);
+ }
+
+ /*
+ * If PHB supports compound PE, to freeze all
+ * slave PEs for consistency.
+ *
+ * If the PE is switching to frozen state for the
+ * first time, to dump the PHB diag-data.
+ */
+ if (!(result & EEH_STATE_NOT_SUPPORT) &&
+ !(result & EEH_STATE_UNAVAILABLE) &&
+ !(result & EEH_STATE_MMIO_ACTIVE) &&
+ !(result & EEH_STATE_DMA_ACTIVE) &&
+ !(pe->state & EEH_PE_ISOLATED)) {
+ if (phb->freeze_pe)
+ phb->freeze_pe(phb, pe->addr);
+
+ eeh_pe_mark_isolated(pe);
+ pnv_eeh_get_phb_diag(pe);
+
+ if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+ }
+
+ return result;
+}
+
+/**
+ * pnv_eeh_get_state - Retrieve PE state
+ * @pe: EEH PE
+ * @delay: delay while PE state is temporarily unavailable
+ *
+ * Retrieve the state of the specified PE. For IODA-compitable
+ * platform, it should be retrieved from IODA table. Therefore,
+ * we prefer passing down to hardware implementation to handle
+ * it.
+ */
+static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+ int ret;
+
+ if (pe->type & EEH_PE_PHB)
+ ret = pnv_eeh_get_phb_state(pe);
+ else
+ ret = pnv_eeh_get_pe_state(pe);
+
+ if (!delay)
+ return ret;
+
+ /*
+ * If the PE state is temporarily unavailable,
+ * to inform the EEH core delay for default
+ * period (1 second)
+ */
+ *delay = 0;
+ if (ret & EEH_STATE_UNAVAILABLE)
+ *delay = 1000;
+
+ return ret;
+}
+
+static s64 pnv_eeh_poll(unsigned long id)
+{
+ s64 rc = OPAL_HARDWARE;
+
+ while (1) {
+ rc = opal_pci_poll(id);
+ if (rc <= 0)
+ break;
+
+ if (system_state < SYSTEM_RUNNING)
+ udelay(1000 * rc);
+ else
+ msleep(rc);
+ }
+
+ return rc;
+}
+
+int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
+{
+ struct pnv_phb *phb = hose->private_data;
+ s64 rc = OPAL_HARDWARE;
+
+ pr_debug("%s: Reset PHB#%x, option=%d\n",
+ __func__, hose->global_number, option);
+
+ /* Issue PHB complete reset request */
+ if (option == EEH_RESET_FUNDAMENTAL ||
+ option == EEH_RESET_HOT)
+ rc = opal_pci_reset(phb->opal_id,
+ OPAL_RESET_PHB_COMPLETE,
+ OPAL_ASSERT_RESET);
+ else if (option == EEH_RESET_DEACTIVATE)
+ rc = opal_pci_reset(phb->opal_id,
+ OPAL_RESET_PHB_COMPLETE,
+ OPAL_DEASSERT_RESET);
+ if (rc < 0)
+ goto out;
+
+ /*
+ * Poll state of the PHB until the request is done
+ * successfully. The PHB reset is usually PHB complete
+ * reset followed by hot reset on root bus. So we also
+ * need the PCI bus settlement delay.
+ */
+ if (rc > 0)
+ rc = pnv_eeh_poll(phb->opal_id);
+ if (option == EEH_RESET_DEACTIVATE) {
+ if (system_state < SYSTEM_RUNNING)
+ udelay(1000 * EEH_PE_RST_SETTLE_TIME);
+ else
+ msleep(EEH_PE_RST_SETTLE_TIME);
+ }
+out:
+ if (rc != OPAL_SUCCESS)
+ return -EIO;
+
+ return 0;
+}
+
+static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
+{
+ struct pnv_phb *phb = hose->private_data;
+ s64 rc = OPAL_HARDWARE;
+
+ pr_debug("%s: Reset PHB#%x, option=%d\n",
+ __func__, hose->global_number, option);
+
+ /*
+ * During the reset deassert time, we needn't care
+ * the reset scope because the firmware does nothing
+ * for fundamental or hot reset during deassert phase.
+ */
+ if (option == EEH_RESET_FUNDAMENTAL)
+ rc = opal_pci_reset(phb->opal_id,
+ OPAL_RESET_PCI_FUNDAMENTAL,
+ OPAL_ASSERT_RESET);
+ else if (option == EEH_RESET_HOT)
+ rc = opal_pci_reset(phb->opal_id,
+ OPAL_RESET_PCI_HOT,
+ OPAL_ASSERT_RESET);
+ else if (option == EEH_RESET_DEACTIVATE)
+ rc = opal_pci_reset(phb->opal_id,
+ OPAL_RESET_PCI_HOT,
+ OPAL_DEASSERT_RESET);
+ if (rc < 0)
+ goto out;
+
+ /* Poll state of the PHB until the request is done */
+ if (rc > 0)
+ rc = pnv_eeh_poll(phb->opal_id);
+ if (option == EEH_RESET_DEACTIVATE)
+ msleep(EEH_PE_RST_SETTLE_TIME);
+out:
+ if (rc != OPAL_SUCCESS)
+ return -EIO;
+
+ return 0;
+}
+
+static int __pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
+{
+ struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ int aer = edev ? edev->aer_cap : 0;
+ u32 ctrl;
+
+ pr_debug("%s: Secondary Reset PCI bus %04x:%02x with option %d\n",
+ __func__, pci_domain_nr(dev->bus),
+ dev->bus->number, option);
+
+ switch (option) {
+ case EEH_RESET_FUNDAMENTAL:
+ case EEH_RESET_HOT:
+ /* Don't report linkDown event */
+ if (aer) {
+ eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,
+ 4, &ctrl);
+ ctrl |= PCI_ERR_UNC_SURPDN;
+ eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,
+ 4, ctrl);
+ }
+
+ eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);
+ ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+ eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);
+
+ msleep(EEH_PE_RST_HOLD_TIME);
+ break;
+ case EEH_RESET_DEACTIVATE:
+ eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);
+ ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+ eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);
+
+ msleep(EEH_PE_RST_SETTLE_TIME);
+
+ /* Continue reporting linkDown event */
+ if (aer) {
+ eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,
+ 4, &ctrl);
+ ctrl &= ~PCI_ERR_UNC_SURPDN;
+ eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,
+ 4, ctrl);
+ }
+
+ break;
+ }
+
+ return 0;
+}
+
+static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option)
+{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+ uint64_t id = PCI_SLOT_ID(phb->opal_id, pci_dev_id(pdev));
+ uint8_t scope;
+ int64_t rc;
+
+ /* Hot reset to the bus if firmware cannot handle */
+ if (!dn || !of_get_property(dn, "ibm,reset-by-firmware", NULL))
+ return __pnv_eeh_bridge_reset(pdev, option);
+
+ pr_debug("%s: FW reset PCI bus %04x:%02x with option %d\n",
+ __func__, pci_domain_nr(pdev->bus),
+ pdev->bus->number, option);
+
+ switch (option) {
+ case EEH_RESET_FUNDAMENTAL:
+ scope = OPAL_RESET_PCI_FUNDAMENTAL;
+ break;
+ case EEH_RESET_HOT:
+ scope = OPAL_RESET_PCI_HOT;
+ break;
+ case EEH_RESET_DEACTIVATE:
+ return 0;
+ default:
+ dev_dbg(&pdev->dev, "%s: Unsupported reset %d\n",
+ __func__, option);
+ return -EINVAL;
+ }
+
+ rc = opal_pci_reset(id, scope, OPAL_ASSERT_RESET);
+ if (rc <= OPAL_SUCCESS)
+ goto out;
+
+ rc = pnv_eeh_poll(id);
+out:
+ return (rc == OPAL_SUCCESS) ? 0 : -EIO;
+}
+
+void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+ struct pci_controller *hose;
+
+ if (pci_is_root_bus(dev->bus)) {
+ hose = pci_bus_to_host(dev->bus);
+ pnv_eeh_root_reset(hose, EEH_RESET_HOT);
+ pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
+ } else {
+ pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
+ pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
+ }
+}
+
+static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
+ int pos, u16 mask)
+{
+ struct eeh_dev *edev = pdn->edev;
+ int i, status = 0;
+
+ /* Wait for Transaction Pending bit to be cleared */
+ for (i = 0; i < 4; i++) {
+ eeh_ops->read_config(edev, pos, 2, &status);
+ if (!(status & mask))
+ return;
+
+ msleep((1 << i) * 100);
+ }
+
+ pr_warn("%s: Pending transaction while issuing %sFLR to %04x:%02x:%02x.%01x\n",
+ __func__, type,
+ pdn->phb->global_number, pdn->busno,
+ PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+}
+
+static int pnv_eeh_do_flr(struct pci_dn *pdn, int option)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 reg = 0;
+
+ if (WARN_ON(!edev->pcie_cap))
+ return -ENOTTY;
+
+ eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
+ if (!(reg & PCI_EXP_DEVCAP_FLR))
+ return -ENOTTY;
+
+ switch (option) {
+ case EEH_RESET_HOT:
+ case EEH_RESET_FUNDAMENTAL:
+ pnv_eeh_wait_for_pending(pdn, "",
+ edev->pcie_cap + PCI_EXP_DEVSTA,
+ PCI_EXP_DEVSTA_TRPND);
+ eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, &reg);
+ reg |= PCI_EXP_DEVCTL_BCR_FLR;
+ eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, reg);
+ msleep(EEH_PE_RST_HOLD_TIME);
+ break;
+ case EEH_RESET_DEACTIVATE:
+ eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, &reg);
+ reg &= ~PCI_EXP_DEVCTL_BCR_FLR;
+ eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
+ 4, reg);
+ msleep(EEH_PE_RST_SETTLE_TIME);
+ break;
+ }
+
+ return 0;
+}
+
+static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 cap = 0;
+
+ if (WARN_ON(!edev->af_cap))
+ return -ENOTTY;
+
+ eeh_ops->read_config(edev, edev->af_cap + PCI_AF_CAP, 1, &cap);
+ if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
+ return -ENOTTY;
+
+ switch (option) {
+ case EEH_RESET_HOT:
+ case EEH_RESET_FUNDAMENTAL:
+ /*
+ * Wait for Transaction Pending bit to clear. A word-aligned
+ * test is used, so we use the control offset rather than status
+ * and shift the test bit to match.
+ */
+ pnv_eeh_wait_for_pending(pdn, "AF",
+ edev->af_cap + PCI_AF_CTRL,
+ PCI_AF_STATUS_TP << 8);
+ eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL,
+ 1, PCI_AF_CTRL_FLR);
+ msleep(EEH_PE_RST_HOLD_TIME);
+ break;
+ case EEH_RESET_DEACTIVATE:
+ eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, 0);
+ msleep(EEH_PE_RST_SETTLE_TIME);
+ break;
+ }
+
+ return 0;
+}
+
+static int pnv_eeh_reset_vf_pe(struct eeh_pe *pe, int option)
+{
+ struct eeh_dev *edev;
+ struct pci_dn *pdn;
+ int ret;
+
+ /* The VF PE should have only one child device */
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
+ pdn = eeh_dev_to_pdn(edev);
+ if (!pdn)
+ return -ENXIO;
+
+ ret = pnv_eeh_do_flr(pdn, option);
+ if (!ret)
+ return ret;
+
+ return pnv_eeh_do_af_flr(pdn, option);
+}
+
+/**
+ * pnv_eeh_reset - Reset the specified PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Do reset on the indicated PE. For PCI bus sensitive PE,
+ * we need to reset the parent p2p bridge. The PHB has to
+ * be reinitialized if the p2p bridge is root bridge. For
+ * PCI device sensitive PE, we will try to reset the device
+ * through FLR. For now, we don't have OPAL APIs to do HARD
+ * reset yet, so all reset would be SOFT (HOT) reset.
+ */
+static int pnv_eeh_reset(struct eeh_pe *pe, int option)
+{
+ struct pci_controller *hose = pe->phb;
+ struct pnv_phb *phb;
+ struct pci_bus *bus;
+ int64_t rc;
+
+ /*
+ * For PHB reset, we always have complete reset. For those PEs whose
+ * primary bus derived from root complex (root bus) or root port
+ * (usually bus#1), we apply hot or fundamental reset on the root port.
+ * For other PEs, we always have hot reset on the PE primary bus.
+ *
+ * Here, we have different design to pHyp, which always clear the
+ * frozen state during PE reset. However, the good idea here from
+ * benh is to keep frozen state before we get PE reset done completely
+ * (until BAR restore). With the frozen state, HW drops illegal IO
+ * or MMIO access, which can incur recursive frozen PE during PE
+ * reset. The side effect is that EEH core has to clear the frozen
+ * state explicitly after BAR restore.
+ */
+ if (pe->type & EEH_PE_PHB)
+ return pnv_eeh_phb_reset(hose, option);
+
+ /*
+ * The frozen PE might be caused by PAPR error injection
+ * registers, which are expected to be cleared after hitting
+ * frozen PE as stated in the hardware spec. Unfortunately,
+ * that's not true on P7IOC. So we have to clear it manually
+ * to avoid recursive EEH errors during recovery.
+ */
+ phb = hose->private_data;
+ if (phb->model == PNV_PHB_MODEL_P7IOC &&
+ (option == EEH_RESET_HOT ||
+ option == EEH_RESET_FUNDAMENTAL)) {
+ rc = opal_pci_reset(phb->opal_id,
+ OPAL_RESET_PHB_ERROR,
+ OPAL_ASSERT_RESET);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld clearing error injection registers\n",
+ __func__, rc);
+ return -EIO;
+ }
+ }
+
+ if (pe->type & EEH_PE_VF)
+ return pnv_eeh_reset_vf_pe(pe, option);
+
+ bus = eeh_pe_bus_get(pe);
+ if (!bus) {
+ pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
+ __func__, pe->phb->global_number, pe->addr);
+ return -EIO;
+ }
+
+ if (pci_is_root_bus(bus))
+ return pnv_eeh_root_reset(hose, option);
+
+ /*
+ * For hot resets try use the generic PCI error recovery reset
+ * functions. These correctly handles the case where the secondary
+ * bus is behind a hotplug slot and it will use the slot provided
+ * reset methods to prevent spurious hotplug events during the reset.
+ *
+ * Fundamental resets need to be handled internally to EEH since the
+ * PCI core doesn't really have a concept of a fundamental reset,
+ * mainly because there's no standard way to generate one. Only a
+ * few devices require an FRESET so it should be fine.
+ */
+ if (option != EEH_RESET_FUNDAMENTAL) {
+ /*
+ * NB: Skiboot and pnv_eeh_bridge_reset() also no-op the
+ * de-assert step. It's like the OPAL reset API was
+ * poorly designed or something...
+ */
+ if (option == EEH_RESET_DEACTIVATE)
+ return 0;
+
+ rc = pci_bus_error_reset(bus->self);
+ if (!rc)
+ return 0;
+ }
+
+ /* otherwise, use the generic bridge reset. this might call into FW */
+ if (pci_is_root_bus(bus->parent))
+ return pnv_eeh_root_reset(hose, option);
+ return pnv_eeh_bridge_reset(bus->self, option);
+}
+
+/**
+ * pnv_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ */
+static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
+ char *drv_log, unsigned long len)
+{
+ if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+
+ return 0;
+}
+
+/**
+ * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @pe: EEH PE
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
+{
+ return 0;
+}
+
+/**
+ * pnv_pe_err_inject - Inject specified error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @func: specific error type
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject specified error, which is
+ * determined by @type and @func, to the indicated PE for
+ * testing purpose.
+ */
+static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask)
+{
+ struct pci_controller *hose = pe->phb;
+ struct pnv_phb *phb = hose->private_data;
+ s64 rc;
+
+ if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
+ type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
+ pr_warn("%s: Invalid error type %d\n",
+ __func__, type);
+ return -ERANGE;
+ }
+
+ if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
+ func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
+ pr_warn("%s: Invalid error function %d\n",
+ __func__, func);
+ return -ERANGE;
+ }
+
+ /* Firmware supports error injection ? */
+ if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
+ pr_warn("%s: Firmware doesn't support error injection\n",
+ __func__);
+ return -ENXIO;
+ }
+
+ /* Do error injection */
+ rc = opal_pci_err_inject(phb->opal_id, pe->addr,
+ type, func, addr, mask);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld injecting error "
+ "%d-%d to PHB#%x-PE#%x\n",
+ __func__, rc, type, func,
+ hose->global_number, pe->addr);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+ if (!edev || !edev->pe)
+ return false;
+
+ /*
+ * We will issue FLR or AF FLR to all VFs, which are contained
+ * in VF PE. It relies on the EEH PCI config accessors. So we
+ * can't block them during the window.
+ */
+ if (edev->physfn && (edev->pe->state & EEH_PE_RESET))
+ return false;
+
+ if (edev->pe->state & EEH_PE_CFG_BLOCKED)
+ return true;
+
+ return false;
+}
+
+static int pnv_eeh_read_config(struct eeh_dev *edev,
+ int where, int size, u32 *val)
+{
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (pnv_eeh_cfg_blocked(pdn)) {
+ *val = 0xFFFFFFFF;
+ return PCIBIOS_SET_FAILED;
+ }
+
+ return pnv_pci_cfg_read(pdn, where, size, val);
+}
+
+static int pnv_eeh_write_config(struct eeh_dev *edev,
+ int where, int size, u32 val)
+{
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (pnv_eeh_cfg_blocked(pdn))
+ return PCIBIOS_SET_FAILED;
+
+ return pnv_pci_cfg_write(pdn, where, size, val);
+}
+
+static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
+{
+ /* GEM */
+ if (data->gemXfir || data->gemRfir ||
+ data->gemRirqfir || data->gemMask || data->gemRwof)
+ pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->gemXfir),
+ be64_to_cpu(data->gemRfir),
+ be64_to_cpu(data->gemRirqfir),
+ be64_to_cpu(data->gemMask),
+ be64_to_cpu(data->gemRwof));
+
+ /* LEM */
+ if (data->lemFir || data->lemErrMask ||
+ data->lemAction0 || data->lemAction1 || data->lemWof)
+ pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->lemFir),
+ be64_to_cpu(data->lemErrMask),
+ be64_to_cpu(data->lemAction0),
+ be64_to_cpu(data->lemAction1),
+ be64_to_cpu(data->lemWof));
+}
+
+static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
+{
+ struct pnv_phb *phb = hose->private_data;
+ struct OpalIoP7IOCErrorData *data =
+ (struct OpalIoP7IOCErrorData*)phb->diag_data;
+ long rc;
+
+ rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
+ __func__, phb->hub_id, rc);
+ return;
+ }
+
+ switch (be16_to_cpu(data->type)) {
+ case OPAL_P7IOC_DIAG_TYPE_RGC:
+ pr_info("P7IOC diag-data for RGC\n\n");
+ pnv_eeh_dump_hub_diag_common(data);
+ if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
+ pr_info(" RGC: %016llx %016llx\n",
+ be64_to_cpu(data->rgc.rgcStatus),
+ be64_to_cpu(data->rgc.rgcLdcp));
+ break;
+ case OPAL_P7IOC_DIAG_TYPE_BI:
+ pr_info("P7IOC diag-data for BI %s\n\n",
+ data->bi.biDownbound ? "Downbound" : "Upbound");
+ pnv_eeh_dump_hub_diag_common(data);
+ if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
+ data->bi.biLdcp2 || data->bi.biFenceStatus)
+ pr_info(" BI: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->bi.biLdcp0),
+ be64_to_cpu(data->bi.biLdcp1),
+ be64_to_cpu(data->bi.biLdcp2),
+ be64_to_cpu(data->bi.biFenceStatus));
+ break;
+ case OPAL_P7IOC_DIAG_TYPE_CI:
+ pr_info("P7IOC diag-data for CI Port %d\n\n",
+ data->ci.ciPort);
+ pnv_eeh_dump_hub_diag_common(data);
+ if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
+ pr_info(" CI: %016llx %016llx\n",
+ be64_to_cpu(data->ci.ciPortStatus),
+ be64_to_cpu(data->ci.ciPortLdcp));
+ break;
+ case OPAL_P7IOC_DIAG_TYPE_MISC:
+ pr_info("P7IOC diag-data for MISC\n\n");
+ pnv_eeh_dump_hub_diag_common(data);
+ break;
+ case OPAL_P7IOC_DIAG_TYPE_I2C:
+ pr_info("P7IOC diag-data for I2C\n\n");
+ pnv_eeh_dump_hub_diag_common(data);
+ break;
+ default:
+ pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+ __func__, phb->hub_id, data->type);
+ }
+}
+
+static int pnv_eeh_get_pe(struct pci_controller *hose,
+ u16 pe_no, struct eeh_pe **pe)
+{
+ struct pnv_phb *phb = hose->private_data;
+ struct pnv_ioda_pe *pnv_pe;
+ struct eeh_pe *dev_pe;
+
+ /*
+ * If PHB supports compound PE, to fetch
+ * the master PE because slave PE is invisible
+ * to EEH core.
+ */
+ pnv_pe = &phb->ioda.pe_array[pe_no];
+ if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
+ pnv_pe = pnv_pe->master;
+ WARN_ON(!pnv_pe ||
+ !(pnv_pe->flags & PNV_IODA_PE_MASTER));
+ pe_no = pnv_pe->pe_number;
+ }
+
+ /* Find the PE according to PE# */
+ dev_pe = eeh_pe_get(hose, pe_no);
+ if (!dev_pe)
+ return -EEXIST;
+
+ /* Freeze the (compound) PE */
+ *pe = dev_pe;
+ if (!(dev_pe->state & EEH_PE_ISOLATED))
+ phb->freeze_pe(phb, pe_no);
+
+ /*
+ * At this point, we're sure the (compound) PE should
+ * have been frozen. However, we still need poke until
+ * hitting the frozen PE on top level.
+ */
+ dev_pe = dev_pe->parent;
+ while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
+ int ret;
+ ret = eeh_ops->get_state(dev_pe, NULL);
+ if (ret <= 0 || eeh_state_active(ret)) {
+ dev_pe = dev_pe->parent;
+ continue;
+ }
+
+ /* Frozen parent PE */
+ *pe = dev_pe;
+ if (!(dev_pe->state & EEH_PE_ISOLATED))
+ phb->freeze_pe(phb, dev_pe->addr);
+
+ /* Next one */
+ dev_pe = dev_pe->parent;
+ }
+
+ return 0;
+}
+
+/**
+ * pnv_eeh_next_error - Retrieve next EEH error to handle
+ * @pe: Affected PE
+ *
+ * The function is expected to be called by EEH core while it gets
+ * special EEH event (without binding PE). The function calls to
+ * OPAL APIs for next error to handle. The informational error is
+ * handled internally by platform. However, the dead IOC, dead PHB,
+ * fenced PHB and frozen PE should be handled by EEH core eventually.
+ */
+static int pnv_eeh_next_error(struct eeh_pe **pe)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ struct eeh_pe *phb_pe, *parent_pe;
+ __be64 frozen_pe_no;
+ __be16 err_type, severity;
+ long rc;
+ int state, ret = EEH_NEXT_ERR_NONE;
+
+ /*
+ * While running here, it's safe to purge the event queue. The
+ * event should still be masked.
+ */
+ eeh_remove_event(NULL, false);
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ /*
+ * If the subordinate PCI buses of the PHB has been
+ * removed or is exactly under error recovery, we
+ * needn't take care of it any more.
+ */
+ phb = hose->private_data;
+ phb_pe = eeh_phb_pe_get(hose);
+ if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
+ continue;
+
+ rc = opal_pci_next_error(phb->opal_id,
+ &frozen_pe_no, &err_type, &severity);
+ if (rc != OPAL_SUCCESS) {
+ pr_devel("%s: Invalid return value on "
+ "PHB#%x (0x%lx) from opal_pci_next_error",
+ __func__, hose->global_number, rc);
+ continue;
+ }
+
+ /* If the PHB doesn't have error, stop processing */
+ if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
+ be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
+ pr_devel("%s: No error found on PHB#%x\n",
+ __func__, hose->global_number);
+ continue;
+ }
+
+ /*
+ * Processing the error. We're expecting the error with
+ * highest priority reported upon multiple errors on the
+ * specific PHB.
+ */
+ pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
+ __func__, be16_to_cpu(err_type),
+ be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
+ hose->global_number);
+ switch (be16_to_cpu(err_type)) {
+ case OPAL_EEH_IOC_ERROR:
+ if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
+ pr_err("EEH: dead IOC detected\n");
+ ret = EEH_NEXT_ERR_DEAD_IOC;
+ } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+ pr_info("EEH: IOC informative error "
+ "detected\n");
+ pnv_eeh_get_and_dump_hub_diag(hose);
+ ret = EEH_NEXT_ERR_NONE;
+ }
+
+ break;
+ case OPAL_EEH_PHB_ERROR:
+ if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
+ *pe = phb_pe;
+ pr_err("EEH: dead PHB#%x detected, "
+ "location: %s\n",
+ hose->global_number,
+ eeh_pe_loc_get(phb_pe));
+ ret = EEH_NEXT_ERR_DEAD_PHB;
+ } else if (be16_to_cpu(severity) ==
+ OPAL_EEH_SEV_PHB_FENCED) {
+ *pe = phb_pe;
+ pr_err("EEH: Fenced PHB#%x detected, "
+ "location: %s\n",
+ hose->global_number,
+ eeh_pe_loc_get(phb_pe));
+ ret = EEH_NEXT_ERR_FENCED_PHB;
+ } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+ pr_info("EEH: PHB#%x informative error "
+ "detected, location: %s\n",
+ hose->global_number,
+ eeh_pe_loc_get(phb_pe));
+ pnv_eeh_get_phb_diag(phb_pe);
+ pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
+ ret = EEH_NEXT_ERR_NONE;
+ }
+
+ break;
+ case OPAL_EEH_PE_ERROR:
+ /*
+ * If we can't find the corresponding PE, we
+ * just try to unfreeze.
+ */
+ if (pnv_eeh_get_pe(hose,
+ be64_to_cpu(frozen_pe_no), pe)) {
+ pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
+ hose->global_number, be64_to_cpu(frozen_pe_no));
+ pr_info("EEH: PHB location: %s\n",
+ eeh_pe_loc_get(phb_pe));
+
+ /* Dump PHB diag-data */
+ rc = opal_pci_get_phb_diag_data2(phb->opal_id,
+ phb->diag_data, phb->diag_data_size);
+ if (rc == OPAL_SUCCESS)
+ pnv_pci_dump_phb_diag_data(hose,
+ phb->diag_data);
+
+ /* Try best to clear it */
+ opal_pci_eeh_freeze_clear(phb->opal_id,
+ be64_to_cpu(frozen_pe_no),
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ ret = EEH_NEXT_ERR_NONE;
+ } else if ((*pe)->state & EEH_PE_ISOLATED ||
+ eeh_pe_passed(*pe)) {
+ ret = EEH_NEXT_ERR_NONE;
+ } else {
+ pr_err("EEH: Frozen PE#%x "
+ "on PHB#%x detected\n",
+ (*pe)->addr,
+ (*pe)->phb->global_number);
+ pr_err("EEH: PE location: %s, "
+ "PHB location: %s\n",
+ eeh_pe_loc_get(*pe),
+ eeh_pe_loc_get(phb_pe));
+ ret = EEH_NEXT_ERR_FROZEN_PE;
+ }
+
+ break;
+ default:
+ pr_warn("%s: Unexpected error type %d\n",
+ __func__, be16_to_cpu(err_type));
+ }
+
+ /*
+ * EEH core will try recover from fenced PHB or
+ * frozen PE. In the time for frozen PE, EEH core
+ * enable IO path for that before collecting logs,
+ * but it ruins the site. So we have to dump the
+ * log in advance here.
+ */
+ if ((ret == EEH_NEXT_ERR_FROZEN_PE ||
+ ret == EEH_NEXT_ERR_FENCED_PHB) &&
+ !((*pe)->state & EEH_PE_ISOLATED)) {
+ eeh_pe_mark_isolated(*pe);
+ pnv_eeh_get_phb_diag(*pe);
+
+ if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data((*pe)->phb,
+ (*pe)->data);
+ }
+
+ /*
+ * We probably have the frozen parent PE out there and
+ * we need have to handle frozen parent PE firstly.
+ */
+ if (ret == EEH_NEXT_ERR_FROZEN_PE) {
+ parent_pe = (*pe)->parent;
+ while (parent_pe) {
+ /* Hit the ceiling ? */
+ if (parent_pe->type & EEH_PE_PHB)
+ break;
+
+ /* Frozen parent PE ? */
+ state = eeh_ops->get_state(parent_pe, NULL);
+ if (state > 0 && !eeh_state_active(state))
+ *pe = parent_pe;
+
+ /* Next parent level */
+ parent_pe = parent_pe->parent;
+ }
+
+ /* We possibly migrate to another PE */
+ eeh_pe_mark_isolated(*pe);
+ }
+
+ /*
+ * If we have no errors on the specific PHB or only
+ * informative error there, we continue poking it.
+ * Otherwise, we need actions to be taken by upper
+ * layer.
+ */
+ if (ret > EEH_NEXT_ERR_INF)
+ break;
+ }
+
+ /* Unmask the event */
+ if (ret == EEH_NEXT_ERR_NONE && eeh_enabled())
+ enable_irq(eeh_event_irq);
+
+ return ret;
+}
+
+static int pnv_eeh_restore_config(struct eeh_dev *edev)
+{
+ struct pnv_phb *phb;
+ s64 ret = 0;
+
+ if (!edev)
+ return -EEXIST;
+
+ if (edev->physfn)
+ return 0;
+
+ phb = edev->controller->private_data;
+ ret = opal_pci_reinit(phb->opal_id,
+ OPAL_REINIT_PCI_DEV, edev->bdfn);
+
+ if (ret) {
+ pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
+ __func__, edev->bdfn, ret);
+ return -EIO;
+ }
+
+ return ret;
+}
+
+static struct eeh_ops pnv_eeh_ops = {
+ .name = "powernv",
+ .probe = pnv_eeh_probe,
+ .set_option = pnv_eeh_set_option,
+ .get_state = pnv_eeh_get_state,
+ .reset = pnv_eeh_reset,
+ .get_log = pnv_eeh_get_log,
+ .configure_bridge = pnv_eeh_configure_bridge,
+ .err_inject = pnv_eeh_err_inject,
+ .read_config = pnv_eeh_read_config,
+ .write_config = pnv_eeh_write_config,
+ .next_error = pnv_eeh_next_error,
+ .restore_config = pnv_eeh_restore_config,
+ .notify_resume = NULL
+};
+
+/**
+ * eeh_powernv_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on powernv platform. This function should be
+ * called before any EEH related functions.
+ */
+static int __init eeh_powernv_init(void)
+{
+ int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ int ret = -EINVAL;
+
+ if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+ pr_warn("%s: OPAL is required !\n", __func__);
+ return -EINVAL;
+ }
+
+ /* Set probe mode */
+ eeh_add_flag(EEH_PROBE_MODE_DEV);
+
+ /*
+ * P7IOC blocks PCI config access to frozen PE, but PHB3
+ * doesn't do that. So we have to selectively enable I/O
+ * prior to collecting error log.
+ */
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ if (phb->model == PNV_PHB_MODEL_P7IOC)
+ eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+
+ if (phb->diag_data_size > max_diag_size)
+ max_diag_size = phb->diag_data_size;
+
+ break;
+ }
+
+ /*
+ * eeh_init() allocates the eeh_pe and its aux data buf so the
+ * size needs to be set before calling eeh_init().
+ */
+ eeh_set_pe_aux_size(max_diag_size);
+ ppc_md.pcibios_bus_add_device = pnv_pcibios_bus_add_device;
+
+ ret = eeh_init(&pnv_eeh_ops);
+ if (!ret)
+ pr_info("EEH: PowerNV platform initialized\n");
+ else
+ pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
+
+ return ret;
+}
+machine_arch_initcall(powernv, eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
new file mode 100644
index 000000000..ad41dffe4
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -0,0 +1,1507 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV cpuidle code
+ *
+ * Copyright 2015 IBM Corp.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+
+#include <asm/firmware.h>
+#include <asm/interrupt.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
+#include <asm/smp.h>
+#include <asm/runlatch.h>
+#include <asm/dbell.h>
+
+#include "powernv.h"
+#include "subcore.h"
+
+/* Power ISA 3.0 allows for stop states 0x0 - 0xF */
+#define MAX_STOP_STATE 0xF
+
+#define P9_STOP_SPR_MSR 2000
+#define P9_STOP_SPR_PSSCR 855
+
+static u32 supported_cpuidle_states;
+struct pnv_idle_states_t *pnv_idle_states;
+int nr_pnv_idle_states;
+
+/*
+ * The default stop state that will be used by ppc_md.power_save
+ * function on platforms that support stop instruction.
+ */
+static u64 pnv_default_stop_val;
+static u64 pnv_default_stop_mask;
+static bool default_stop_found;
+
+/*
+ * First stop state levels when SPR and TB loss can occur.
+ */
+static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
+static u64 deep_spr_loss_state = MAX_STOP_STATE + 1;
+
+/*
+ * psscr value and mask of the deepest stop idle state.
+ * Used when a cpu is offlined.
+ */
+static u64 pnv_deepest_stop_psscr_val;
+static u64 pnv_deepest_stop_psscr_mask;
+static u64 pnv_deepest_stop_flag;
+static bool deepest_stop_found;
+
+static unsigned long power7_offline_type;
+
+static int __init pnv_save_sprs_for_deep_states(void)
+{
+ int cpu;
+ int rc;
+
+ /*
+ * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric across
+ * all cpus at boot. Get these reg values of current cpu and use the
+ * same across all cpus.
+ */
+ uint64_t lpcr_val = mfspr(SPRN_LPCR);
+ uint64_t hid0_val = mfspr(SPRN_HID0);
+ uint64_t hmeer_val = mfspr(SPRN_HMEER);
+ uint64_t msr_val = MSR_IDLE;
+ uint64_t psscr_val = pnv_deepest_stop_psscr_val;
+
+ for_each_present_cpu(cpu) {
+ uint64_t pir = get_hard_smp_processor_id(cpu);
+ uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
+
+ rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+ if (rc != 0)
+ return rc;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
+ if (rc)
+ return rc;
+
+ rc = opal_slw_set_reg(pir,
+ P9_STOP_SPR_PSSCR, psscr_val);
+
+ if (rc)
+ return rc;
+ }
+
+ /* HIDs are per core registers */
+ if (cpu_thread_in_core(cpu) == 0) {
+
+ rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+ if (rc != 0)
+ return rc;
+
+ /* Only p8 needs to set extra HID registers */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ uint64_t hid1_val = mfspr(SPRN_HID1);
+ uint64_t hid4_val = mfspr(SPRN_HID4);
+ uint64_t hid5_val = mfspr(SPRN_HID5);
+
+ rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+ if (rc != 0)
+ return rc;
+ }
+ }
+ }
+
+ return 0;
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+ return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+static void pnv_fastsleep_workaround_apply(void *info)
+
+{
+ int cpu = smp_processor_id();
+ int rc;
+ int *err = info;
+
+ if (cpu_first_thread_sibling(cpu) != cpu)
+ return;
+
+ rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
+ OPAL_CONFIG_IDLE_APPLY);
+ if (rc)
+ *err = 1;
+}
+
+static bool power7_fastsleep_workaround_entry = true;
+static bool power7_fastsleep_workaround_exit = true;
+
+/*
+ * Used to store fastsleep workaround state
+ * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
+ * 1 - Workaround applied once, never undone.
+ */
+static u8 fastsleep_workaround_applyonce;
+
+static ssize_t show_fastsleep_workaround_applyonce(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", fastsleep_workaround_applyonce);
+}
+
+static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ int err;
+ u8 val;
+
+ if (kstrtou8(buf, 0, &val) || val != 1)
+ return -EINVAL;
+
+ if (fastsleep_workaround_applyonce == 1)
+ return count;
+
+ /*
+ * fastsleep_workaround_applyonce = 1 implies
+ * fastsleep workaround needs to be left in 'applied' state on all
+ * the cores. Do this by-
+ * 1. Disable the 'undo' workaround in fastsleep exit path
+ * 2. Sendi IPIs to all the cores which have at least one online thread
+ * 3. Disable the 'apply' workaround in fastsleep entry path
+ *
+ * There is no need to send ipi to cores which have all threads
+ * offlined, as last thread of the core entering fastsleep or deeper
+ * state would have applied workaround.
+ */
+ power7_fastsleep_workaround_exit = false;
+
+ cpus_read_lock();
+ on_each_cpu(pnv_fastsleep_workaround_apply, &err, 1);
+ cpus_read_unlock();
+ if (err) {
+ pr_err("fastsleep_workaround_applyonce change failed while running pnv_fastsleep_workaround_apply");
+ goto fail;
+ }
+
+ power7_fastsleep_workaround_entry = false;
+
+ fastsleep_workaround_applyonce = 1;
+
+ return count;
+fail:
+ return -EIO;
+}
+
+static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
+ show_fastsleep_workaround_applyonce,
+ store_fastsleep_workaround_applyonce);
+
+static inline void atomic_start_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ int thread_nr = cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ clear_bit(thread_nr, state);
+}
+
+static inline void atomic_stop_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ int thread_nr = cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ set_bit(thread_nr, state);
+}
+
+static inline void atomic_lock_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *lock = &paca_ptrs[first]->idle_lock;
+
+ while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, lock)))
+ barrier();
+}
+
+static inline void atomic_unlock_and_stop_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long thread = 1UL << cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long *lock = &paca_ptrs[first]->idle_lock;
+ u64 s = READ_ONCE(*state);
+ u64 new, tmp;
+
+ BUG_ON(!(READ_ONCE(*lock) & PNV_CORE_IDLE_LOCK_BIT));
+ BUG_ON(s & thread);
+
+again:
+ new = s | thread;
+ tmp = cmpxchg(state, s, new);
+ if (unlikely(tmp != s)) {
+ s = tmp;
+ goto again;
+ }
+ clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
+}
+
+static inline void atomic_unlock_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *lock = &paca_ptrs[first]->idle_lock;
+
+ BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, lock));
+ clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, lock);
+}
+
+/* P7 and P8 */
+struct p7_sprs {
+ /* per core */
+ u64 tscr;
+ u64 worc;
+
+ /* per subcore */
+ u64 sdr1;
+ u64 rpr;
+
+ /* per thread */
+ u64 lpcr;
+ u64 hfscr;
+ u64 fscr;
+ u64 purr;
+ u64 spurr;
+ u64 dscr;
+ u64 wort;
+
+ /* per thread SPRs that get lost in shallow states */
+ u64 amr;
+ u64 iamr;
+ u64 uamor;
+ /* amor is restored to constant ~0 */
+};
+
+static unsigned long power7_idle_insn(unsigned long type)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long thread = 1UL << cpu_thread_in_core(cpu);
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
+ unsigned long srr1;
+ bool full_winkle;
+ struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
+ bool sprs_saved = false;
+ int rc;
+
+ if (unlikely(type != PNV_THREAD_NAP)) {
+ atomic_lock_thread_idle();
+
+ BUG_ON(!(*state & thread));
+ *state &= ~thread;
+
+ if (power7_fastsleep_workaround_entry) {
+ if ((*state & core_thread_mask) == 0) {
+ rc = opal_config_cpu_idle_state(
+ OPAL_CONFIG_IDLE_FASTSLEEP,
+ OPAL_CONFIG_IDLE_APPLY);
+ BUG_ON(rc);
+ }
+ }
+
+ if (type == PNV_THREAD_WINKLE) {
+ sprs.tscr = mfspr(SPRN_TSCR);
+ sprs.worc = mfspr(SPRN_WORC);
+
+ sprs.sdr1 = mfspr(SPRN_SDR1);
+ sprs.rpr = mfspr(SPRN_RPR);
+
+ sprs.lpcr = mfspr(SPRN_LPCR);
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ sprs.hfscr = mfspr(SPRN_HFSCR);
+ sprs.fscr = mfspr(SPRN_FSCR);
+ }
+ sprs.purr = mfspr(SPRN_PURR);
+ sprs.spurr = mfspr(SPRN_SPURR);
+ sprs.dscr = mfspr(SPRN_DSCR);
+ sprs.wort = mfspr(SPRN_WORT);
+
+ sprs_saved = true;
+
+ /*
+ * Increment winkle counter and set all winkle bits if
+ * all threads are winkling. This allows wakeup side to
+ * distinguish between fast sleep and winkle state
+ * loss. Fast sleep still has to resync the timebase so
+ * this may not be a really big win.
+ */
+ *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
+ >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
+ == threads_per_core)
+ *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ }
+
+ atomic_unlock_thread_idle();
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ sprs.amr = mfspr(SPRN_AMR);
+ sprs.iamr = mfspr(SPRN_IAMR);
+ sprs.uamor = mfspr(SPRN_UAMOR);
+ }
+
+ local_paca->thread_idle_state = type;
+ srr1 = isa206_idle_insn_mayloss(type); /* go idle */
+ local_paca->thread_idle_state = PNV_THREAD_RUNNING;
+
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
+ /*
+ * We don't need an isync after the mtsprs here because
+ * the upcoming mtmsrd is execution synchronizing.
+ */
+ mtspr(SPRN_AMR, sprs.amr);
+ mtspr(SPRN_IAMR, sprs.iamr);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_UAMOR, sprs.uamor);
+ }
+ }
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
+ if (unlikely(type != PNV_THREAD_NAP)) {
+ atomic_lock_thread_idle();
+ if (type == PNV_THREAD_WINKLE) {
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
+ }
+ atomic_unlock_and_stop_thread_idle();
+ }
+ return srr1;
+ }
+
+ /* HV state loss */
+ BUG_ON(type == PNV_THREAD_NAP);
+
+ atomic_lock_thread_idle();
+
+ full_winkle = false;
+ if (type == PNV_THREAD_WINKLE) {
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
+ *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
+ full_winkle = true;
+ BUG_ON(!sprs_saved);
+ }
+ }
+
+ WARN_ON(*state & thread);
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* Per-core SPRs */
+ if (full_winkle) {
+ mtspr(SPRN_TSCR, sprs.tscr);
+ mtspr(SPRN_WORC, sprs.worc);
+ }
+
+ if (power7_fastsleep_workaround_exit) {
+ rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
+ OPAL_CONFIG_IDLE_UNDO);
+ BUG_ON(rc);
+ }
+
+ /* TB */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+
+core_woken:
+ if (!full_winkle)
+ goto subcore_woken;
+
+ if ((*state & local_paca->subcore_sibling_mask) != 0)
+ goto subcore_woken;
+
+ /* Per-subcore SPRs */
+ mtspr(SPRN_SDR1, sprs.sdr1);
+ mtspr(SPRN_RPR, sprs.rpr);
+
+subcore_woken:
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+ atomic_unlock_and_stop_thread_idle();
+
+ /* Fast sleep does not lose SPRs */
+ if (!full_winkle)
+ return srr1;
+
+ /* Per-thread SPRs */
+ mtspr(SPRN_LPCR, sprs.lpcr);
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_HFSCR, sprs.hfscr);
+ mtspr(SPRN_FSCR, sprs.fscr);
+ }
+ mtspr(SPRN_PURR, sprs.purr);
+ mtspr(SPRN_SPURR, sprs.spurr);
+ mtspr(SPRN_DSCR, sprs.dscr);
+ mtspr(SPRN_WORT, sprs.wort);
+
+ mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ /*
+ * The SLB has to be restored here, but it sometimes still
+ * contains entries, so the __ variant must be used to prevent
+ * multi hits.
+ */
+ __slb_restore_bolted_realmode();
+#endif
+
+ return srr1;
+}
+
+extern unsigned long idle_kvm_start_guest(unsigned long srr1);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long power7_offline(void)
+{
+ unsigned long srr1;
+
+ mtmsr(MSR_IDLE);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* Tell KVM we're entering idle. */
+ /******************************************************/
+ /* N O T E W E L L ! ! ! N O T E W E L L */
+ /* The following store to HSTATE_HWTHREAD_STATE(r13) */
+ /* MUST occur in real mode, i.e. with the MMU off, */
+ /* and the MMU must stay off until we clear this flag */
+ /* and test HSTATE_HWTHREAD_REQ(r13) in */
+ /* pnv_powersave_wakeup in this file. */
+ /* The reason is that another thread can switch the */
+ /* MMU to a guest context whenever this flag is set */
+ /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
+ /* that would potentially cause this thread to start */
+ /* executing instructions from guest memory in */
+ /* hypervisor mode, leading to a host crash or data */
+ /* corruption, or worse. */
+ /******************************************************/
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
+#endif
+
+ __ppc64_runlatch_off();
+ srr1 = power7_idle_insn(power7_offline_type);
+ __ppc64_runlatch_on();
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
+#endif
+
+ mtmsr(MSR_KERNEL);
+
+ return srr1;
+}
+#endif
+
+void power7_idle_type(unsigned long type)
+{
+ unsigned long srr1;
+
+ if (!prep_irq_for_idle_irqsoff())
+ return;
+
+ mtmsr(MSR_IDLE);
+ __ppc64_runlatch_off();
+ srr1 = power7_idle_insn(type);
+ __ppc64_runlatch_on();
+ mtmsr(MSR_KERNEL);
+
+ fini_irq_for_idle_irqsoff();
+ irq_set_pending_from_srr1(srr1);
+}
+
+static void power7_idle(void)
+{
+ if (!powersave_nap)
+ return;
+
+ power7_idle_type(PNV_THREAD_NAP);
+}
+
+struct p9_sprs {
+ /* per core */
+ u64 ptcr;
+ u64 rpr;
+ u64 tscr;
+ u64 ldbar;
+
+ /* per thread */
+ u64 lpcr;
+ u64 hfscr;
+ u64 fscr;
+ u64 pid;
+ u64 purr;
+ u64 spurr;
+ u64 dscr;
+ u64 ciabr;
+
+ u64 mmcra;
+ u32 mmcr0;
+ u32 mmcr1;
+ u64 mmcr2;
+
+ /* per thread SPRs that get lost in shallow states */
+ u64 amr;
+ u64 iamr;
+ u64 amor;
+ u64 uamor;
+};
+
+static unsigned long power9_idle_stop(unsigned long psscr)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
+ unsigned long srr1;
+ unsigned long pls;
+ unsigned long mmcr0 = 0;
+ unsigned long mmcra = 0;
+ struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
+ bool sprs_saved = false;
+
+ if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+ /* EC=ESL=0 case */
+
+ /*
+ * Wake synchronously. SRESET via xscom may still cause
+ * a 0x100 powersave wakeup with SRR1 reason!
+ */
+ srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
+ if (likely(!srr1))
+ return 0;
+
+ /*
+ * Registers not saved, can't recover!
+ * This would be a hardware bug
+ */
+ BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+ goto out;
+ }
+
+ /* EC=ESL=1 case */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
+ local_paca->requested_psscr = psscr;
+ /* order setting requested_psscr vs testing dont_stop */
+ smp_mb();
+ if (atomic_read(&local_paca->dont_stop)) {
+ local_paca->requested_psscr = 0;
+ return 0;
+ }
+ }
+#endif
+
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+ /*
+ * POWER9 DD2 can incorrectly set PMAO when waking up
+ * after a state-loss idle. Saving and restoring MMCR0
+ * over idle is a workaround.
+ */
+ mmcr0 = mfspr(SPRN_MMCR0);
+ }
+
+ if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
+ sprs.lpcr = mfspr(SPRN_LPCR);
+ sprs.hfscr = mfspr(SPRN_HFSCR);
+ sprs.fscr = mfspr(SPRN_FSCR);
+ sprs.pid = mfspr(SPRN_PID);
+ sprs.purr = mfspr(SPRN_PURR);
+ sprs.spurr = mfspr(SPRN_SPURR);
+ sprs.dscr = mfspr(SPRN_DSCR);
+ sprs.ciabr = mfspr(SPRN_CIABR);
+
+ sprs.mmcra = mfspr(SPRN_MMCRA);
+ sprs.mmcr0 = mfspr(SPRN_MMCR0);
+ sprs.mmcr1 = mfspr(SPRN_MMCR1);
+ sprs.mmcr2 = mfspr(SPRN_MMCR2);
+
+ sprs.ptcr = mfspr(SPRN_PTCR);
+ sprs.rpr = mfspr(SPRN_RPR);
+ sprs.tscr = mfspr(SPRN_TSCR);
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ sprs.ldbar = mfspr(SPRN_LDBAR);
+
+ sprs_saved = true;
+
+ atomic_start_thread_idle();
+ }
+
+ sprs.amr = mfspr(SPRN_AMR);
+ sprs.iamr = mfspr(SPRN_IAMR);
+ sprs.uamor = mfspr(SPRN_UAMOR);
+
+ srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ local_paca->requested_psscr = 0;
+#endif
+
+ psscr = mfspr(SPRN_PSSCR);
+
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
+ /*
+ * We don't need an isync after the mtsprs here because the
+ * upcoming mtmsrd is execution synchronizing.
+ */
+ mtspr(SPRN_AMR, sprs.amr);
+ mtspr(SPRN_IAMR, sprs.iamr);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_UAMOR, sprs.uamor);
+
+ /*
+ * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
+ * might have been corrupted and needs flushing. We also need
+ * to reload MMCR0 (see mmcr0 comment above).
+ */
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT);
+ mtspr(SPRN_MMCR0, mmcr0);
+ }
+
+ /*
+ * DD2.2 and earlier need to set then clear bit 60 in MMCRA
+ * to ensure the PMU starts running.
+ */
+ mmcra = mfspr(SPRN_MMCRA);
+ mmcra |= PPC_BIT(60);
+ mtspr(SPRN_MMCRA, mmcra);
+ mmcra &= ~PPC_BIT(60);
+ mtspr(SPRN_MMCRA, mmcra);
+ }
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ /*
+ * On POWER9, SRR1 bits do not match exactly as expected.
+ * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+ * just always test PSSCR for SPR/TB state loss.
+ */
+ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+ if (likely(pls < deep_spr_loss_state)) {
+ if (sprs_saved)
+ atomic_stop_thread_idle();
+ goto out;
+ }
+
+ /* HV state loss */
+ BUG_ON(!sprs_saved);
+
+ atomic_lock_thread_idle();
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* Per-core SPRs */
+ mtspr(SPRN_PTCR, sprs.ptcr);
+ mtspr(SPRN_RPR, sprs.rpr);
+ mtspr(SPRN_TSCR, sprs.tscr);
+
+ if (pls >= pnv_first_tb_loss_level) {
+ /* TB loss */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+ }
+
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+
+core_woken:
+ atomic_unlock_and_stop_thread_idle();
+
+ /* Per-thread SPRs */
+ mtspr(SPRN_LPCR, sprs.lpcr);
+ mtspr(SPRN_HFSCR, sprs.hfscr);
+ mtspr(SPRN_FSCR, sprs.fscr);
+ mtspr(SPRN_PID, sprs.pid);
+ mtspr(SPRN_PURR, sprs.purr);
+ mtspr(SPRN_SPURR, sprs.spurr);
+ mtspr(SPRN_DSCR, sprs.dscr);
+ mtspr(SPRN_CIABR, sprs.ciabr);
+
+ mtspr(SPRN_MMCRA, sprs.mmcra);
+ mtspr(SPRN_MMCR0, sprs.mmcr0);
+ mtspr(SPRN_MMCR1, sprs.mmcr1);
+ mtspr(SPRN_MMCR2, sprs.mmcr2);
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ mtspr(SPRN_LDBAR, sprs.ldbar);
+
+ mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
+
+ if (!radix_enabled())
+ __slb_restore_bolted_realmode();
+
+out:
+ mtmsr(MSR_KERNEL);
+
+ return srr1;
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * This is used in working around bugs in thread reconfiguration
+ * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
+ * memory and the way that XER[SO] is checkpointed.
+ * This function forces the core into SMT4 in order by asking
+ * all other threads not to stop, and sending a message to any
+ * that are in a stop state.
+ * Must be called with preemption disabled.
+ */
+void pnv_power9_force_smt4_catch(void)
+{
+ int cpu, cpu0, thr;
+ int awake_threads = 1; /* this thread is awake */
+ int poke_threads = 0;
+ int need_awake = threads_per_core;
+
+ cpu = smp_processor_id();
+ cpu0 = cpu & ~(threads_per_core - 1);
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (cpu != cpu0 + thr)
+ atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
+ }
+ /* order setting dont_stop vs testing requested_psscr */
+ smp_mb();
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (!paca_ptrs[cpu0+thr]->requested_psscr)
+ ++awake_threads;
+ else
+ poke_threads |= (1 << thr);
+ }
+
+ /* If at least 3 threads are awake, the core is in SMT4 already */
+ if (awake_threads < need_awake) {
+ /* We have to wake some threads; we'll use msgsnd */
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (poke_threads & (1 << thr)) {
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
+ paca_ptrs[cpu0+thr]->hw_cpu_id);
+ }
+ }
+ /* now spin until at least 3 threads are awake */
+ do {
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if ((poke_threads & (1 << thr)) &&
+ !paca_ptrs[cpu0+thr]->requested_psscr) {
+ ++awake_threads;
+ poke_threads &= ~(1 << thr);
+ }
+ }
+ } while (awake_threads < need_awake);
+ }
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
+
+void pnv_power9_force_smt4_release(void)
+{
+ int cpu, cpu0, thr;
+
+ cpu = smp_processor_id();
+ cpu0 = cpu & ~(threads_per_core - 1);
+
+ /* clear all the dont_stop flags */
+ for (thr = 0; thr < threads_per_core; ++thr) {
+ if (cpu != cpu0 + thr)
+ atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
+ }
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+struct p10_sprs {
+ /*
+ * SPRs that get lost in shallow states:
+ *
+ * P10 loses CR, LR, CTR, FPSCR, VSCR, XER, TAR, SPRG2, and HSPRG1
+ * isa300 idle routines restore CR, LR.
+ * CTR is volatile
+ * idle thread doesn't use FP or VEC
+ * kernel doesn't use TAR
+ * HSPRG1 is only live in HV interrupt entry
+ * SPRG2 is only live in KVM guests, KVM handles it.
+ */
+};
+
+static unsigned long power10_idle_stop(unsigned long psscr)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
+ unsigned long srr1;
+ unsigned long pls;
+// struct p10_sprs sprs = {}; /* avoid false used-uninitialised */
+ bool sprs_saved = false;
+
+ if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+ /* EC=ESL=0 case */
+
+ /*
+ * Wake synchronously. SRESET via xscom may still cause
+ * a 0x100 powersave wakeup with SRR1 reason!
+ */
+ srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
+ if (likely(!srr1))
+ return 0;
+
+ /*
+ * Registers not saved, can't recover!
+ * This would be a hardware bug
+ */
+ BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+ goto out;
+ }
+
+ /* EC=ESL=1 case */
+ if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
+ /* XXX: save SPRs for deep state loss here. */
+
+ sprs_saved = true;
+
+ atomic_start_thread_idle();
+ }
+
+ srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
+
+ psscr = mfspr(SPRN_PSSCR);
+
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ /*
+ * On POWER10, SRR1 bits do not match exactly as expected.
+ * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+ * just always test PSSCR for SPR/TB state loss.
+ */
+ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+ if (likely(pls < deep_spr_loss_state)) {
+ if (sprs_saved)
+ atomic_stop_thread_idle();
+ goto out;
+ }
+
+ /* HV state loss */
+ BUG_ON(!sprs_saved);
+
+ atomic_lock_thread_idle();
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* XXX: restore per-core SPRs here */
+
+ if (pls >= pnv_first_tb_loss_level) {
+ /* TB loss */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+ }
+
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+
+core_woken:
+ atomic_unlock_and_stop_thread_idle();
+
+ /* XXX: restore per-thread SPRs here */
+
+ if (!radix_enabled())
+ __slb_restore_bolted_realmode();
+
+out:
+ mtmsr(MSR_KERNEL);
+
+ return srr1;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long arch300_offline_stop(unsigned long psscr)
+{
+ unsigned long srr1;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ srr1 = power10_idle_stop(psscr);
+ else
+ srr1 = power9_idle_stop(psscr);
+
+ return srr1;
+}
+#endif
+
+void arch300_idle_type(unsigned long stop_psscr_val,
+ unsigned long stop_psscr_mask)
+{
+ unsigned long psscr;
+ unsigned long srr1;
+
+ if (!prep_irq_for_idle_irqsoff())
+ return;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+ __ppc64_runlatch_off();
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ srr1 = power10_idle_stop(psscr);
+ else
+ srr1 = power9_idle_stop(psscr);
+ __ppc64_runlatch_on();
+
+ fini_irq_for_idle_irqsoff();
+
+ irq_set_pending_from_srr1(srr1);
+}
+
+/*
+ * Used for ppc_md.power_save which needs a function with no parameters
+ */
+static void arch300_idle(void)
+{
+ arch300_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
+{
+ u64 pir = get_hard_smp_processor_id(cpu);
+
+ mtspr(SPRN_LPCR, lpcr_val);
+
+ /*
+ * Program the LPCR via stop-api only if the deepest stop state
+ * can lose hypervisor context.
+ */
+ if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
+ opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+}
+
+/*
+ * pnv_cpu_offline: A function that puts the CPU into the deepest
+ * available platform idle state on a CPU-Offline.
+ * interrupts hard disabled and no lazy irq pending.
+ */
+unsigned long pnv_cpu_offline(unsigned int cpu)
+{
+ unsigned long srr1;
+
+ __ppc64_runlatch_off();
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
+ unsigned long psscr;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
+ pnv_deepest_stop_psscr_val;
+ srr1 = arch300_offline_stop(psscr);
+ } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
+ srr1 = power7_offline();
+ } else {
+ /* This is the fallback method. We emulate snooze */
+ while (!generic_check_cpu_restart(cpu)) {
+ HMT_low();
+ HMT_very_low();
+ }
+ srr1 = 0;
+ HMT_medium();
+ }
+
+ __ppc64_runlatch_on();
+
+ return srr1;
+}
+#endif
+
+/*
+ * Power ISA 3.0 idle initialization.
+ *
+ * POWER ISA 3.0 defines a new SPR Processor stop Status and Control
+ * Register (PSSCR) to control idle behavior.
+ *
+ * PSSCR layout:
+ * ----------------------------------------------------------
+ * | PLS | /// | SD | ESL | EC | PSLL | /// | TR | MTL | RL |
+ * ----------------------------------------------------------
+ * 0 4 41 42 43 44 48 54 56 60
+ *
+ * PSSCR key fields:
+ * Bits 0:3 - Power-Saving Level Status (PLS). This field indicates the
+ * lowest power-saving state the thread entered since stop instruction was
+ * last executed.
+ *
+ * Bit 41 - Status Disable(SD)
+ * 0 - Shows PLS entries
+ * 1 - PLS entries are all 0
+ *
+ * Bit 42 - Enable State Loss
+ * 0 - No state is lost irrespective of other fields
+ * 1 - Allows state loss
+ *
+ * Bit 43 - Exit Criterion
+ * 0 - Exit from power-save mode on any interrupt
+ * 1 - Exit from power-save mode controlled by LPCR's PECE bits
+ *
+ * Bits 44:47 - Power-Saving Level Limit
+ * This limits the power-saving level that can be entered into.
+ *
+ * Bits 60:63 - Requested Level
+ * Used to specify which power-saving level must be entered on executing
+ * stop instruction
+ */
+
+int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
+{
+ int err = 0;
+
+ /*
+ * psscr_mask == 0xf indicates an older firmware.
+ * Set remaining fields of psscr to the default values.
+ * See NOTE above definition of PSSCR_HV_DEFAULT_VAL
+ */
+ if (*psscr_mask == 0xf) {
+ *psscr_val = *psscr_val | PSSCR_HV_DEFAULT_VAL;
+ *psscr_mask = PSSCR_HV_DEFAULT_MASK;
+ return err;
+ }
+
+ /*
+ * New firmware is expected to set the psscr_val bits correctly.
+ * Validate that the following invariants are correctly maintained by
+ * the new firmware.
+ * - ESL bit value matches the EC bit value.
+ * - ESL bit is set for all the deep stop states.
+ */
+ if (GET_PSSCR_ESL(*psscr_val) != GET_PSSCR_EC(*psscr_val)) {
+ err = ERR_EC_ESL_MISMATCH;
+ } else if ((flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
+ GET_PSSCR_ESL(*psscr_val) == 0) {
+ err = ERR_DEEP_STATE_ESL_MISMATCH;
+ }
+
+ return err;
+}
+
+/*
+ * pnv_arch300_idle_init: Initializes the default idle state, first
+ * deep idle state and deepest idle state on
+ * ISA 3.0 CPUs.
+ *
+ * @np: /ibm,opal/power-mgt device node
+ * @flags: cpu-idle-state-flags array
+ * @dt_idle_states: Number of idle state entries
+ * Returns 0 on success
+ */
+static void __init pnv_arch300_idle_init(void)
+{
+ u64 max_residency_ns = 0;
+ int i;
+
+ /* stop is not really architected, we only have p9,p10 drivers */
+ if (!pvr_version_is(PVR_POWER10) && !pvr_version_is(PVR_POWER9))
+ return;
+
+ /*
+ * pnv_deepest_stop_{val,mask} should be set to values corresponding to
+ * the deepest stop state.
+ *
+ * pnv_default_stop_{val,mask} should be set to values corresponding to
+ * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
+ */
+ pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
+ deep_spr_loss_state = MAX_STOP_STATE + 1;
+ for (i = 0; i < nr_pnv_idle_states; i++) {
+ int err;
+ struct pnv_idle_states_t *state = &pnv_idle_states[i];
+ u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
+
+ /* No deep loss driver implemented for POWER10 yet */
+ if (pvr_version_is(PVR_POWER10) &&
+ state->flags & (OPAL_PM_TIMEBASE_STOP|OPAL_PM_LOSE_FULL_CONTEXT))
+ continue;
+
+ if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
+ (pnv_first_tb_loss_level > psscr_rl))
+ pnv_first_tb_loss_level = psscr_rl;
+
+ if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
+ (deep_spr_loss_state > psscr_rl))
+ deep_spr_loss_state = psscr_rl;
+
+ /*
+ * The idle code does not deal with TB loss occurring
+ * in a shallower state than SPR loss, so force it to
+ * behave like SPRs are lost if TB is lost. POWER9 would
+ * never encounter this, but a POWER8 core would if it
+ * implemented the stop instruction. So this is for forward
+ * compatibility.
+ */
+ if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
+ (deep_spr_loss_state > psscr_rl))
+ deep_spr_loss_state = psscr_rl;
+
+ err = validate_psscr_val_mask(&state->psscr_val,
+ &state->psscr_mask,
+ state->flags);
+ if (err) {
+ report_invalid_psscr_val(state->psscr_val, err);
+ continue;
+ }
+
+ state->valid = true;
+
+ if (max_residency_ns < state->residency_ns) {
+ max_residency_ns = state->residency_ns;
+ pnv_deepest_stop_psscr_val = state->psscr_val;
+ pnv_deepest_stop_psscr_mask = state->psscr_mask;
+ pnv_deepest_stop_flag = state->flags;
+ deepest_stop_found = true;
+ }
+
+ if (!default_stop_found &&
+ (state->flags & OPAL_PM_STOP_INST_FAST)) {
+ pnv_default_stop_val = state->psscr_val;
+ pnv_default_stop_mask = state->psscr_mask;
+ default_stop_found = true;
+ WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
+ }
+ }
+
+ if (unlikely(!default_stop_found)) {
+ pr_warn("cpuidle-powernv: No suitable default stop state found. Disabling platform idle.\n");
+ } else {
+ ppc_md.power_save = arch300_idle;
+ pr_info("cpuidle-powernv: Default stop: psscr = 0x%016llx,mask=0x%016llx\n",
+ pnv_default_stop_val, pnv_default_stop_mask);
+ }
+
+ if (unlikely(!deepest_stop_found)) {
+ pr_warn("cpuidle-powernv: No suitable stop state for CPU-Hotplug. Offlined CPUs will busy wait");
+ } else {
+ pr_info("cpuidle-powernv: Deepest stop: psscr = 0x%016llx,mask=0x%016llx\n",
+ pnv_deepest_stop_psscr_val,
+ pnv_deepest_stop_psscr_mask);
+ }
+
+ pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n",
+ deep_spr_loss_state);
+
+ pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n",
+ pnv_first_tb_loss_level);
+}
+
+static void __init pnv_disable_deep_states(void)
+{
+ /*
+ * The stop-api is unable to restore hypervisor
+ * resources on wakeup from platform idle states which
+ * lose full context. So disable such states.
+ */
+ supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
+ pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
+ pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+ (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
+ /*
+ * Use the default stop state for CPU-Hotplug
+ * if available.
+ */
+ if (default_stop_found) {
+ pnv_deepest_stop_psscr_val = pnv_default_stop_val;
+ pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
+ pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
+ pnv_deepest_stop_psscr_val);
+ } else { /* Fallback to snooze loop for CPU-Hotplug */
+ deepest_stop_found = false;
+ pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
+ }
+ }
+}
+
+/*
+ * Probe device tree for supported idle states
+ */
+static void __init pnv_probe_idle_states(void)
+{
+ int i;
+
+ if (nr_pnv_idle_states < 0) {
+ pr_warn("cpuidle-powernv: no idle states found in the DT\n");
+ return;
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pnv_arch300_idle_init();
+
+ for (i = 0; i < nr_pnv_idle_states; i++)
+ supported_cpuidle_states |= pnv_idle_states[i].flags;
+}
+
+/*
+ * This function parses device-tree and populates all the information
+ * into pnv_idle_states structure. It also sets up nr_pnv_idle_states
+ * which is the number of cpuidle states discovered through device-tree.
+ */
+
+static int __init pnv_parse_cpuidle_dt(void)
+{
+ struct device_node *np;
+ int nr_idle_states, i;
+ int rc = 0;
+ u32 *temp_u32;
+ u64 *temp_u64;
+ const char **temp_string;
+
+ np = of_find_node_by_path("/ibm,opal/power-mgt");
+ if (!np) {
+ pr_warn("opal: PowerMgmt Node not found\n");
+ return -ENODEV;
+ }
+ nr_idle_states = of_property_count_u32_elems(np,
+ "ibm,cpu-idle-state-flags");
+
+ pnv_idle_states = kcalloc(nr_idle_states, sizeof(*pnv_idle_states),
+ GFP_KERNEL);
+ temp_u32 = kcalloc(nr_idle_states, sizeof(u32), GFP_KERNEL);
+ temp_u64 = kcalloc(nr_idle_states, sizeof(u64), GFP_KERNEL);
+ temp_string = kcalloc(nr_idle_states, sizeof(char *), GFP_KERNEL);
+
+ if (!(pnv_idle_states && temp_u32 && temp_u64 && temp_string)) {
+ pr_err("Could not allocate memory for dt parsing\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Read flags */
+ if (of_property_read_u32_array(np, "ibm,cpu-idle-state-flags",
+ temp_u32, nr_idle_states)) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-flags in DT\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ pnv_idle_states[i].flags = temp_u32[i];
+
+ /* Read latencies */
+ if (of_property_read_u32_array(np, "ibm,cpu-idle-state-latencies-ns",
+ temp_u32, nr_idle_states)) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ pnv_idle_states[i].latency_ns = temp_u32[i];
+
+ /* Read residencies */
+ if (of_property_read_u32_array(np, "ibm,cpu-idle-state-residency-ns",
+ temp_u32, nr_idle_states)) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-residency-ns in DT\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ pnv_idle_states[i].residency_ns = temp_u32[i];
+
+ /* For power9 and later */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* Read pm_crtl_val */
+ if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr",
+ temp_u64, nr_idle_states)) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr in DT\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ pnv_idle_states[i].psscr_val = temp_u64[i];
+
+ /* Read pm_crtl_mask */
+ if (of_property_read_u64_array(np, "ibm,cpu-idle-state-psscr-mask",
+ temp_u64, nr_idle_states)) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-psscr-mask in DT\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ pnv_idle_states[i].psscr_mask = temp_u64[i];
+ }
+
+ /*
+ * power8 specific properties ibm,cpu-idle-state-pmicr-mask and
+ * ibm,cpu-idle-state-pmicr-val were never used and there is no
+ * plan to use it in near future. Hence, not parsing these properties
+ */
+
+ if (of_property_read_string_array(np, "ibm,cpu-idle-state-names",
+ temp_string, nr_idle_states) < 0) {
+ pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ for (i = 0; i < nr_idle_states; i++)
+ strscpy(pnv_idle_states[i].name, temp_string[i],
+ PNV_IDLE_NAME_LEN);
+ nr_pnv_idle_states = nr_idle_states;
+ rc = 0;
+out:
+ kfree(temp_u32);
+ kfree(temp_u64);
+ kfree(temp_string);
+ of_node_put(np);
+ return rc;
+}
+
+static int __init pnv_init_idle_states(void)
+{
+ int cpu;
+ int rc = 0;
+
+ /* Set up PACA fields */
+ for_each_present_cpu(cpu) {
+ struct paca_struct *p = paca_ptrs[cpu];
+
+ p->idle_state = 0;
+ if (cpu == cpu_first_thread_sibling(cpu))
+ p->idle_state = (1 << threads_per_core) - 1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* P7/P8 nap */
+ p->thread_idle_state = PNV_THREAD_RUNNING;
+ } else if (pvr_version_is(PVR_POWER9)) {
+ /* P9 stop workarounds */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ p->requested_psscr = 0;
+ atomic_set(&p->dont_stop, 0);
+#endif
+ }
+ }
+
+ /* In case we error out nr_pnv_idle_states will be zero */
+ nr_pnv_idle_states = 0;
+ supported_cpuidle_states = 0;
+
+ if (cpuidle_disable != IDLE_NO_OVERRIDE)
+ goto out;
+ rc = pnv_parse_cpuidle_dt();
+ if (rc)
+ return rc;
+ pnv_probe_idle_states();
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+ power7_fastsleep_workaround_entry = false;
+ power7_fastsleep_workaround_exit = false;
+ } else {
+ struct device *dev_root;
+ /*
+ * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
+ * workaround is needed to use fastsleep. Provide sysfs
+ * control to choose how this workaround has to be
+ * applied.
+ */
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ device_create_file(dev_root,
+ &dev_attr_fastsleep_workaround_applyonce);
+ put_device(dev_root);
+ }
+ }
+
+ update_subcore_sibling_mask();
+
+ if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
+ ppc_md.power_save = power7_idle;
+ power7_offline_type = PNV_THREAD_NAP;
+ }
+
+ if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
+ (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
+ power7_offline_type = PNV_THREAD_WINKLE;
+ else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
+ (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+ power7_offline_type = PNV_THREAD_SLEEP;
+ }
+
+ if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
+ if (pnv_save_sprs_for_deep_states())
+ pnv_disable_deep_states();
+ }
+
+out:
+ return 0;
+}
+machine_subsys_initcall(powernv, pnv_init_idle_states);
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
new file mode 100644
index 000000000..877720c64
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) IBM Corporation, 2014, 2017
+ * Anton Blanchard, Rashmica Gupta.
+ */
+
+#define pr_fmt(fmt) "memtrace: " fmt
+
+#include <linux/bitops.h>
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
+#include <linux/numa.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+
+/* This enables us to keep track of the memory removed from each node. */
+struct memtrace_entry {
+ void *mem;
+ u64 start;
+ u64 size;
+ u32 nid;
+ struct dentry *dir;
+ char name[16];
+};
+
+static DEFINE_MUTEX(memtrace_mutex);
+static u64 memtrace_size;
+
+static struct memtrace_entry *memtrace_array;
+static unsigned int memtrace_array_nr;
+
+
+static ssize_t memtrace_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct memtrace_entry *ent = filp->private_data;
+
+ return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size);
+}
+
+static int memtrace_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct memtrace_entry *ent = filp->private_data;
+
+ if (ent->size < vma->vm_end - vma->vm_start)
+ return -EINVAL;
+
+ if (vma->vm_pgoff << PAGE_SHIFT >= ent->size)
+ return -EINVAL;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ return remap_pfn_range(vma, vma->vm_start, PHYS_PFN(ent->start) + vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, vma->vm_page_prot);
+}
+
+static const struct file_operations memtrace_fops = {
+ .llseek = default_llseek,
+ .read = memtrace_read,
+ .open = simple_open,
+ .mmap = memtrace_mmap,
+};
+
+#define FLUSH_CHUNK_SIZE SZ_1G
+/**
+ * flush_dcache_range_chunked(): Write any modified data cache blocks out to
+ * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE
+ * Does not invalidate the corresponding instruction cache blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ * @chunk: the max size of the chunks
+ */
+static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
+ unsigned long chunk)
+{
+ unsigned long i;
+
+ for (i = start; i < stop; i += chunk) {
+ flush_dcache_range(i, min(stop, i + chunk));
+ cond_resched();
+ }
+}
+
+static void memtrace_clear_range(unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ unsigned long pfn;
+
+ /* As HIGHMEM does not apply, use clear_page() directly. */
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
+ if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
+ cond_resched();
+ clear_page(__va(PFN_PHYS(pfn)));
+ }
+ /*
+ * Before we go ahead and use this range as cache inhibited range
+ * flush the cache.
+ */
+ flush_dcache_range_chunked((unsigned long)pfn_to_kaddr(start_pfn),
+ (unsigned long)pfn_to_kaddr(start_pfn + nr_pages),
+ FLUSH_CHUNK_SIZE);
+}
+
+static u64 memtrace_alloc_node(u32 nid, u64 size)
+{
+ const unsigned long nr_pages = PHYS_PFN(size);
+ unsigned long pfn, start_pfn;
+ struct page *page;
+
+ /*
+ * Trace memory needs to be aligned to the size, which is guaranteed
+ * by alloc_contig_pages().
+ */
+ page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE |
+ __GFP_NOWARN, nid, NULL);
+ if (!page)
+ return 0;
+ start_pfn = page_to_pfn(page);
+
+ /*
+ * Clear the range while we still have a linear mapping.
+ *
+ * TODO: use __GFP_ZERO with alloc_contig_pages() once supported.
+ */
+ memtrace_clear_range(start_pfn, nr_pages);
+
+ /*
+ * Set pages PageOffline(), to indicate that nobody (e.g., hibernation,
+ * dumping, ...) should be touching these pages.
+ */
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
+ __SetPageOffline(pfn_to_page(pfn));
+
+ arch_remove_linear_mapping(PFN_PHYS(start_pfn), size);
+
+ return PFN_PHYS(start_pfn);
+}
+
+static int memtrace_init_regions_runtime(u64 size)
+{
+ u32 nid;
+ u64 m;
+
+ memtrace_array = kcalloc(num_online_nodes(),
+ sizeof(struct memtrace_entry), GFP_KERNEL);
+ if (!memtrace_array) {
+ pr_err("Failed to allocate memtrace_array\n");
+ return -EINVAL;
+ }
+
+ for_each_online_node(nid) {
+ m = memtrace_alloc_node(nid, size);
+
+ /*
+ * A node might not have any local memory, so warn but
+ * continue on.
+ */
+ if (!m) {
+ pr_err("Failed to allocate trace memory on node %d\n", nid);
+ continue;
+ }
+
+ pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m);
+
+ memtrace_array[memtrace_array_nr].start = m;
+ memtrace_array[memtrace_array_nr].size = size;
+ memtrace_array[memtrace_array_nr].nid = nid;
+ memtrace_array_nr++;
+ }
+
+ return 0;
+}
+
+static struct dentry *memtrace_debugfs_dir;
+
+static int memtrace_init_debugfs(void)
+{
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < memtrace_array_nr; i++) {
+ struct dentry *dir;
+ struct memtrace_entry *ent = &memtrace_array[i];
+
+ ent->mem = ioremap(ent->start, ent->size);
+ /* Warn but continue on */
+ if (!ent->mem) {
+ pr_err("Failed to map trace memory at 0x%llx\n",
+ ent->start);
+ ret = -1;
+ continue;
+ }
+
+ snprintf(ent->name, 16, "%08x", ent->nid);
+ dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir);
+
+ ent->dir = dir;
+ debugfs_create_file_unsafe("trace", 0600, dir, ent, &memtrace_fops);
+ debugfs_create_x64("start", 0400, dir, &ent->start);
+ debugfs_create_x64("size", 0400, dir, &ent->size);
+ }
+
+ return ret;
+}
+
+static int memtrace_free(int nid, u64 start, u64 size)
+{
+ struct mhp_params params = { .pgprot = PAGE_KERNEL };
+ const unsigned long nr_pages = PHYS_PFN(size);
+ const unsigned long start_pfn = PHYS_PFN(start);
+ unsigned long pfn;
+ int ret;
+
+ ret = arch_create_linear_mapping(nid, start, size, &params);
+ if (ret)
+ return ret;
+
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
+ __ClearPageOffline(pfn_to_page(pfn));
+
+ free_contig_range(start_pfn, nr_pages);
+ return 0;
+}
+
+/*
+ * Iterate through the chunks of memory we allocated and attempt to expose
+ * them back to the kernel.
+ */
+static int memtrace_free_regions(void)
+{
+ int i, ret = 0;
+ struct memtrace_entry *ent;
+
+ for (i = memtrace_array_nr - 1; i >= 0; i--) {
+ ent = &memtrace_array[i];
+
+ /* We have freed this chunk previously */
+ if (ent->nid == NUMA_NO_NODE)
+ continue;
+
+ /* Remove from io mappings */
+ if (ent->mem) {
+ iounmap(ent->mem);
+ ent->mem = 0;
+ }
+
+ if (memtrace_free(ent->nid, ent->start, ent->size)) {
+ pr_err("Failed to free trace memory on node %d\n",
+ ent->nid);
+ ret += 1;
+ continue;
+ }
+
+ /*
+ * Memory was freed successfully so clean up references to it
+ * so on reentry we can tell that this chunk was freed.
+ */
+ debugfs_remove_recursive(ent->dir);
+ pr_info("Freed trace memory back on node %d\n", ent->nid);
+ ent->size = ent->start = ent->nid = NUMA_NO_NODE;
+ }
+ if (ret)
+ return ret;
+
+ /* If all chunks of memory were freed successfully, reset globals */
+ kfree(memtrace_array);
+ memtrace_array = NULL;
+ memtrace_size = 0;
+ memtrace_array_nr = 0;
+ return 0;
+}
+
+static int memtrace_enable_set(void *data, u64 val)
+{
+ int rc = -EAGAIN;
+ u64 bytes;
+
+ /*
+ * Don't attempt to do anything if size isn't aligned to a memory
+ * block or equal to zero.
+ */
+ bytes = memory_block_size_bytes();
+ if (val & (bytes - 1)) {
+ pr_err("Value must be aligned with 0x%llx\n", bytes);
+ return -EINVAL;
+ }
+
+ mutex_lock(&memtrace_mutex);
+
+ /* Free all previously allocated memory. */
+ if (memtrace_size && memtrace_free_regions())
+ goto out_unlock;
+
+ if (!val) {
+ rc = 0;
+ goto out_unlock;
+ }
+
+ /* Allocate memory. */
+ if (memtrace_init_regions_runtime(val))
+ goto out_unlock;
+
+ if (memtrace_init_debugfs())
+ goto out_unlock;
+
+ memtrace_size = val;
+ rc = 0;
+out_unlock:
+ mutex_unlock(&memtrace_mutex);
+ return rc;
+}
+
+static int memtrace_enable_get(void *data, u64 *val)
+{
+ *val = memtrace_size;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get,
+ memtrace_enable_set, "0x%016llx\n");
+
+static int memtrace_init(void)
+{
+ memtrace_debugfs_dir = debugfs_create_dir("memtrace",
+ arch_debugfs_dir);
+
+ debugfs_create_file("enable", 0600, memtrace_debugfs_dir,
+ NULL, &memtrace_init_fops);
+
+ return 0;
+}
+machine_device_initcall(powernv, memtrace_init);
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
new file mode 100644
index 000000000..64a9c7125
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <asm/pnv-ocxl.h>
+#include <asm/opal.h>
+#include <misc/ocxl-config.h>
+#include "pci.h"
+
+#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full
+#define PNV_OCXL_ACTAG_MAX 64
+/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
+#define PNV_OCXL_PASID_BITS 15
+#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1)
+
+#define AFU_PRESENT (1 << 31)
+#define AFU_INDEX_MASK 0x3F000000
+#define AFU_INDEX_SHIFT 24
+#define ACTAG_MASK 0xFFF
+
+
+struct actag_range {
+ u16 start;
+ u16 count;
+};
+
+struct npu_link {
+ struct list_head list;
+ int domain;
+ int bus;
+ int dev;
+ u16 fn_desired_actags[8];
+ struct actag_range fn_actags[8];
+ bool assignment_done;
+};
+static struct list_head links_list = LIST_HEAD_INIT(links_list);
+static DEFINE_MUTEX(links_list_lock);
+
+
+/*
+ * opencapi actags handling:
+ *
+ * When sending commands, the opencapi device references the memory
+ * context it's targeting with an 'actag', which is really an alias
+ * for a (BDF, pasid) combination. When it receives a command, the NPU
+ * must do a lookup of the actag to identify the memory context. The
+ * hardware supports a finite number of actags per link (64 for
+ * POWER9).
+ *
+ * The device can carry multiple functions, and each function can have
+ * multiple AFUs. Each AFU advertises in its config space the number
+ * of desired actags. The host must configure in the config space of
+ * the AFU how many actags the AFU is really allowed to use (which can
+ * be less than what the AFU desires).
+ *
+ * When a PCI function is probed by the driver, it has no visibility
+ * about the other PCI functions and how many actags they'd like,
+ * which makes it impossible to distribute actags fairly among AFUs.
+ *
+ * Unfortunately, the only way to know how many actags a function
+ * desires is by looking at the data for each AFU in the config space
+ * and add them up. Similarly, the only way to know how many actags
+ * all the functions of the physical device desire is by adding the
+ * previously computed function counts. Then we can match that against
+ * what the hardware supports.
+ *
+ * To get a comprehensive view, we use a 'pci fixup': at the end of
+ * PCI enumeration, each function counts how many actags its AFUs
+ * desire and we save it in a 'npu_link' structure, shared between all
+ * the PCI functions of a same device. Therefore, when the first
+ * function is probed by the driver, we can get an idea of the total
+ * count of desired actags for the device, and assign the actags to
+ * the AFUs, by pro-rating if needed.
+ */
+
+static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
+{
+ int vsec = pos;
+ u16 vendor, id;
+
+ while ((vsec = pci_find_next_ext_capability(dev, vsec,
+ OCXL_EXT_CAP_ID_DVSEC))) {
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+ &vendor);
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+ if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+ return vsec;
+ }
+ return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+ int vsec = 0;
+ u8 idx;
+
+ while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
+ vsec))) {
+ pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+ &idx);
+ if (idx == afu_idx)
+ return vsec;
+ }
+ return 0;
+}
+
+static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
+{
+ int pos;
+ u32 val;
+
+ pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM,
+ OCXL_DVSEC_FUNC_ID);
+ if (!pos)
+ return -ESRCH;
+
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+ if (val & AFU_PRESENT)
+ *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
+ else
+ *afu_idx = -1;
+ return 0;
+}
+
+static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
+{
+ int pos;
+ u16 actag_sup;
+
+ pos = find_dvsec_afu_ctrl(dev, afu_idx);
+ if (!pos)
+ return -ESRCH;
+
+ pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
+ &actag_sup);
+ *actag = actag_sup & ACTAG_MASK;
+ return 0;
+}
+
+static struct npu_link *find_link(struct pci_dev *dev)
+{
+ struct npu_link *link;
+
+ list_for_each_entry(link, &links_list, list) {
+ /* The functions of a device all share the same link */
+ if (link->domain == pci_domain_nr(dev->bus) &&
+ link->bus == dev->bus->number &&
+ link->dev == PCI_SLOT(dev->devfn)) {
+ return link;
+ }
+ }
+
+ /* link doesn't exist yet. Allocate one */
+ link = kzalloc(sizeof(struct npu_link), GFP_KERNEL);
+ if (!link)
+ return NULL;
+ link->domain = pci_domain_nr(dev->bus);
+ link->bus = dev->bus->number;
+ link->dev = PCI_SLOT(dev->devfn);
+ list_add(&link->list, &links_list);
+ return link;
+}
+
+static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct npu_link *link;
+ int rc, afu_idx = -1, i, actag;
+
+ if (!machine_is(powernv))
+ return;
+
+ if (phb->type != PNV_PHB_NPU_OCAPI)
+ return;
+
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_warn(&dev->dev, "couldn't update actag information\n");
+ mutex_unlock(&links_list_lock);
+ return;
+ }
+
+ /*
+ * Check how many actags are desired for the AFUs under that
+ * function and add it to the count for the link
+ */
+ rc = get_max_afu_index(dev, &afu_idx);
+ if (rc) {
+ /* Most likely an invalid config space */
+ dev_dbg(&dev->dev, "couldn't find AFU information\n");
+ afu_idx = -1;
+ }
+
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
+ for (i = 0; i <= afu_idx; i++) {
+ /*
+ * AFU index 'holes' are allowed. So don't fail if we
+ * can't read the actag info for an index
+ */
+ rc = get_actag_count(dev, i, &actag);
+ if (rc)
+ continue;
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
+ }
+ dev_dbg(&dev->dev, "total actags for function: %d\n",
+ link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
+
+ mutex_unlock(&links_list_lock);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
+
+static u16 assign_fn_actags(u16 desired, u16 total)
+{
+ u16 count;
+
+ if (total <= PNV_OCXL_ACTAG_MAX)
+ count = desired;
+ else
+ count = PNV_OCXL_ACTAG_MAX * desired / total;
+
+ return count;
+}
+
+static void assign_actags(struct npu_link *link)
+{
+ u16 actag_count, range_start = 0, total_desired = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ total_desired += link->fn_desired_actags[i];
+
+ for (i = 0; i < 8; i++) {
+ if (link->fn_desired_actags[i]) {
+ actag_count = assign_fn_actags(
+ link->fn_desired_actags[i],
+ total_desired);
+ link->fn_actags[i].start = range_start;
+ link->fn_actags[i].count = actag_count;
+ range_start += actag_count;
+ WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
+ }
+ pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
+ link->domain, link->bus, link->dev, i,
+ link->fn_actags[i].start, link->fn_actags[i].count,
+ link->fn_desired_actags[i]);
+ }
+ link->assignment_done = true;
+}
+
+int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
+ u16 *supported)
+{
+ struct npu_link *link;
+
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_err(&dev->dev, "actag information not found\n");
+ mutex_unlock(&links_list_lock);
+ return -ENODEV;
+ }
+ /*
+ * On p9, we only have 64 actags per link, so they must be
+ * shared by all the functions of the same adapter. We counted
+ * the desired actag counts during PCI enumeration, so that we
+ * can allocate a pro-rated number of actags to each function.
+ */
+ if (!link->assignment_done)
+ assign_actags(link);
+
+ *base = link->fn_actags[PCI_FUNC(dev->devfn)].start;
+ *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count;
+ *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
+
+ mutex_unlock(&links_list_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
+
+int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
+{
+ struct npu_link *link;
+ int i, rc = -EINVAL;
+
+ /*
+ * The number of PASIDs (process address space ID) which can
+ * be used by a function depends on how many functions exist
+ * on the device. The NPU needs to be configured to know how
+ * many bits are available to PASIDs and how many are to be
+ * used by the function BDF identifier.
+ *
+ * We only support one AFU-carrying function for now.
+ */
+ mutex_lock(&links_list_lock);
+
+ link = find_link(dev);
+ if (!link) {
+ dev_err(&dev->dev, "actag information not found\n");
+ mutex_unlock(&links_list_lock);
+ return -ENODEV;
+ }
+
+ for (i = 0; i < 8; i++)
+ if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
+ *count = PNV_OCXL_PASID_MAX;
+ rc = 0;
+ break;
+ }
+
+ mutex_unlock(&links_list_lock);
+ dev_dbg(&dev->dev, "%d PASIDs available for function\n",
+ rc ? 0 : *count);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
+
+static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
+{
+ int shift, idx;
+
+ WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
+ idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
+ shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
+ buf[idx] |= rate << shift;
+}
+
+int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+ char *rate_buf, int rate_buf_size)
+{
+ if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+ return -EINVAL;
+ /*
+ * The TL capabilities are a characteristic of the NPU, so
+ * we go with hard-coded values.
+ *
+ * The receiving rate of each template is encoded on 4 bits.
+ *
+ * On P9:
+ * - templates 0 -> 3 are supported
+ * - templates 0, 1 and 3 have a 0 receiving rate
+ * - template 2 has receiving rate of 1 (extra cycle)
+ */
+ memset(rate_buf, 0, rate_buf_size);
+ set_templ_rate(2, 1, rate_buf);
+ *cap = PNV_OCXL_TL_P9_RECV_CAP;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
+
+int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+ uint64_t rate_buf_phys, int rate_buf_size)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ int rc;
+
+ if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
+ return -EINVAL;
+
+ rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
+ rate_buf_phys, rate_buf_size);
+ if (rc) {
+ dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
+ return -EINVAL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
+
+int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
+{
+ int rc;
+
+ rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Can't get translation interrupt for device\n");
+ return rc;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
+
+void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+ void __iomem *tfc, void __iomem *pe_handle)
+{
+ iounmap(dsisr);
+ iounmap(dar);
+ iounmap(tfc);
+ iounmap(pe_handle);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
+
+int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+ void __iomem **dar, void __iomem **tfc,
+ void __iomem **pe_handle)
+{
+ u64 reg;
+ int i, j, rc = 0;
+ void __iomem *regs[4];
+
+ /*
+ * opal stores the mmio addresses of the DSISR, DAR, TFC and
+ * PE_HANDLE registers in a device tree property, in that
+ * order
+ */
+ for (i = 0; i < 4; i++) {
+ rc = of_property_read_u64_index(dev->dev.of_node,
+ "ibm,opal-xsl-mmio", i, &reg);
+ if (rc)
+ break;
+ regs[i] = ioremap(reg, 8);
+ if (!regs[i]) {
+ rc = -EINVAL;
+ break;
+ }
+ }
+ if (rc) {
+ dev_err(&dev->dev, "Can't map translation mmio registers\n");
+ for (j = i - 1; j >= 0; j--)
+ iounmap(regs[j]);
+ } else {
+ *dsisr = regs[0];
+ *dar = regs[1];
+ *tfc = regs[2];
+ *pe_handle = regs[3];
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
+
+struct spa_data {
+ u64 phb_opal_id;
+ u32 bdfn;
+};
+
+int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
+ void **platform_data)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct spa_data *data;
+ u32 bdfn;
+ int rc;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ bdfn = pci_dev_id(dev);
+ rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem),
+ PE_mask);
+ if (rc) {
+ dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
+ kfree(data);
+ return rc;
+ }
+ data->phb_opal_id = phb->opal_id;
+ data->bdfn = bdfn;
+ *platform_data = (void *) data;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
+
+void pnv_ocxl_spa_release(void *platform_data)
+{
+ struct spa_data *data = (struct spa_data *) platform_data;
+ int rc;
+
+ rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
+ WARN_ON(rc);
+ kfree(data);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
+
+int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
+{
+ struct spa_data *data = (struct spa_data *) platform_data;
+
+ return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
+
+int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
+ uint64_t lpcr, void __iomem **arva)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ u64 mmio_atsd;
+ int rc;
+
+ /* ATSD physical address.
+ * ATSD LAUNCH register: write access initiates a shoot down to
+ * initiate the TLB Invalidate command.
+ */
+ rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
+ 0, &mmio_atsd);
+ if (rc) {
+ dev_info(&dev->dev, "No available ATSD found\n");
+ return rc;
+ }
+
+ /* Assign a register set to a Logical Partition and MMIO ATSD
+ * LPARID register to the required value.
+ */
+ rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
+ lparid, lpcr);
+ if (rc) {
+ dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
+ return rc;
+ }
+
+ *arva = ioremap(mmio_atsd, 24);
+ if (!(*arva)) {
+ dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
+ rc = -ENOMEM;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
+
+void pnv_ocxl_unmap_lpar(void __iomem *arva)
+{
+ iounmap(arva);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
+
+void pnv_ocxl_tlb_invalidate(void __iomem *arva,
+ unsigned long pid,
+ unsigned long addr,
+ unsigned long page_size)
+{
+ unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
+ u64 val = 0ull;
+ int pend;
+ u8 size;
+
+ if (!(arva))
+ return;
+
+ if (addr) {
+ /* load Abbreviated Virtual Address register with
+ * the necessary value
+ */
+ val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
+ out_be64(arva + PNV_OCXL_ATSD_AVA, val);
+ }
+
+ /* Write access initiates a shoot down to initiate the
+ * TLB Invalidate command
+ */
+ val = PNV_OCXL_ATSD_LNCH_R;
+ val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
+ if (addr)
+ val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
+ else {
+ val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
+ val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
+ }
+ val |= PNV_OCXL_ATSD_LNCH_PRS;
+ /* Actual Page Size to be invalidated
+ * 000 4KB
+ * 101 64KB
+ * 001 2MB
+ * 010 1GB
+ */
+ size = 0b101;
+ if (page_size == 0x1000)
+ size = 0b000;
+ if (page_size == 0x200000)
+ size = 0b001;
+ if (page_size == 0x40000000)
+ size = 0b010;
+ val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
+ val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
+ out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
+
+ /* Poll the ATSD status register to determine when the
+ * TLB Invalidate has been completed.
+ */
+ val = in_be64(arva + PNV_OCXL_ATSD_STAT);
+ pend = val >> 63;
+
+ while (pend) {
+ if (time_after_eq(jiffies, timeout)) {
+ pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
+ __func__, val, pid);
+ return;
+ }
+ cpu_relax();
+ val = in_be64(arva + PNV_OCXL_ATSD_STAT);
+ pend = val >> 63;
+ }
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
new file mode 100644
index 000000000..c094fdf58
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL asynchronous completion interfaces
+ *
+ * Copyright 2013-2017 IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/gfp.h>
+#include <linux/of.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+
+enum opal_async_token_state {
+ ASYNC_TOKEN_UNALLOCATED = 0,
+ ASYNC_TOKEN_ALLOCATED,
+ ASYNC_TOKEN_DISPATCHED,
+ ASYNC_TOKEN_ABANDONED,
+ ASYNC_TOKEN_COMPLETED
+};
+
+struct opal_async_token {
+ enum opal_async_token_state state;
+ struct opal_msg response;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait);
+static DEFINE_SPINLOCK(opal_async_comp_lock);
+static struct semaphore opal_async_sem;
+static unsigned int opal_max_async_tokens;
+static struct opal_async_token *opal_async_tokens;
+
+static int __opal_async_get_token(void)
+{
+ unsigned long flags;
+ int i, token = -EBUSY;
+
+ spin_lock_irqsave(&opal_async_comp_lock, flags);
+
+ for (i = 0; i < opal_max_async_tokens; i++) {
+ if (opal_async_tokens[i].state == ASYNC_TOKEN_UNALLOCATED) {
+ opal_async_tokens[i].state = ASYNC_TOKEN_ALLOCATED;
+ token = i;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+ return token;
+}
+
+/*
+ * Note: If the returned token is used in an opal call and opal returns
+ * OPAL_ASYNC_COMPLETION you MUST call one of opal_async_wait_response() or
+ * opal_async_wait_response_interruptible() at least once before calling another
+ * opal_async_* function
+ */
+int opal_async_get_token_interruptible(void)
+{
+ int token;
+
+ /* Wait until a token is available */
+ if (down_interruptible(&opal_async_sem))
+ return -ERESTARTSYS;
+
+ token = __opal_async_get_token();
+ if (token < 0)
+ up(&opal_async_sem);
+
+ return token;
+}
+EXPORT_SYMBOL_GPL(opal_async_get_token_interruptible);
+
+static int __opal_async_release_token(int token)
+{
+ unsigned long flags;
+ int rc;
+
+ if (token < 0 || token >= opal_max_async_tokens) {
+ pr_err("%s: Passed token is out of range, token %d\n",
+ __func__, token);
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&opal_async_comp_lock, flags);
+ switch (opal_async_tokens[token].state) {
+ case ASYNC_TOKEN_COMPLETED:
+ case ASYNC_TOKEN_ALLOCATED:
+ opal_async_tokens[token].state = ASYNC_TOKEN_UNALLOCATED;
+ rc = 0;
+ break;
+ /*
+ * DISPATCHED and ABANDONED tokens must wait for OPAL to respond.
+ * Mark a DISPATCHED token as ABANDONED so that the response handling
+ * code knows no one cares and that it can free it then.
+ */
+ case ASYNC_TOKEN_DISPATCHED:
+ opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED;
+ fallthrough;
+ default:
+ rc = 1;
+ }
+ spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+
+ return rc;
+}
+
+int opal_async_release_token(int token)
+{
+ int ret;
+
+ ret = __opal_async_release_token(token);
+ if (!ret)
+ up(&opal_async_sem);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(opal_async_release_token);
+
+int opal_async_wait_response(uint64_t token, struct opal_msg *msg)
+{
+ if (token >= opal_max_async_tokens) {
+ pr_err("%s: Invalid token passed\n", __func__);
+ return -EINVAL;
+ }
+
+ if (!msg) {
+ pr_err("%s: Invalid message pointer passed\n", __func__);
+ return -EINVAL;
+ }
+
+ /*
+ * There is no need to mark the token as dispatched, wait_event()
+ * will block until the token completes.
+ *
+ * Wakeup the poller before we wait for events to speed things
+ * up on platforms or simulators where the interrupts aren't
+ * functional.
+ */
+ opal_wake_poller();
+ wait_event(opal_async_wait, opal_async_tokens[token].state
+ == ASYNC_TOKEN_COMPLETED);
+ memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(opal_async_wait_response);
+
+int opal_async_wait_response_interruptible(uint64_t token, struct opal_msg *msg)
+{
+ unsigned long flags;
+ int ret;
+
+ if (token >= opal_max_async_tokens) {
+ pr_err("%s: Invalid token passed\n", __func__);
+ return -EINVAL;
+ }
+
+ if (!msg) {
+ pr_err("%s: Invalid message pointer passed\n", __func__);
+ return -EINVAL;
+ }
+
+ /*
+ * The first time this gets called we mark the token as DISPATCHED
+ * so that if wait_event_interruptible() returns not zero and the
+ * caller frees the token, we know not to actually free the token
+ * until the response comes.
+ *
+ * Only change if the token is ALLOCATED - it may have been
+ * completed even before the caller gets around to calling this
+ * the first time.
+ *
+ * There is also a dirty great comment at the token allocation
+ * function that if the opal call returns OPAL_ASYNC_COMPLETION to
+ * the caller then the caller *must* call this or the not
+ * interruptible version before doing anything else with the
+ * token.
+ */
+ if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED) {
+ spin_lock_irqsave(&opal_async_comp_lock, flags);
+ if (opal_async_tokens[token].state == ASYNC_TOKEN_ALLOCATED)
+ opal_async_tokens[token].state = ASYNC_TOKEN_DISPATCHED;
+ spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+ }
+
+ /*
+ * Wakeup the poller before we wait for events to speed things
+ * up on platforms or simulators where the interrupts aren't
+ * functional.
+ */
+ opal_wake_poller();
+ ret = wait_event_interruptible(opal_async_wait,
+ opal_async_tokens[token].state ==
+ ASYNC_TOKEN_COMPLETED);
+ if (!ret)
+ memcpy(msg, &opal_async_tokens[token].response, sizeof(*msg));
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(opal_async_wait_response_interruptible);
+
+/* Called from interrupt context */
+static int opal_async_comp_event(struct notifier_block *nb,
+ unsigned long msg_type, void *msg)
+{
+ struct opal_msg *comp_msg = msg;
+ enum opal_async_token_state state;
+ unsigned long flags;
+ uint64_t token;
+
+ if (msg_type != OPAL_MSG_ASYNC_COMP)
+ return 0;
+
+ token = be64_to_cpu(comp_msg->params[0]);
+ spin_lock_irqsave(&opal_async_comp_lock, flags);
+ state = opal_async_tokens[token].state;
+ opal_async_tokens[token].state = ASYNC_TOKEN_COMPLETED;
+ spin_unlock_irqrestore(&opal_async_comp_lock, flags);
+
+ if (state == ASYNC_TOKEN_ABANDONED) {
+ /* Free the token, no one else will */
+ opal_async_release_token(token);
+ return 0;
+ }
+ memcpy(&opal_async_tokens[token].response, comp_msg, sizeof(*comp_msg));
+ wake_up(&opal_async_wait);
+
+ return 0;
+}
+
+static struct notifier_block opal_async_comp_nb = {
+ .notifier_call = opal_async_comp_event,
+ .next = NULL,
+ .priority = 0,
+};
+
+int __init opal_async_comp_init(void)
+{
+ struct device_node *opal_node;
+ const __be32 *async;
+ int err;
+
+ opal_node = of_find_node_by_path("/ibm,opal");
+ if (!opal_node) {
+ pr_err("%s: Opal node not found\n", __func__);
+ err = -ENOENT;
+ goto out;
+ }
+
+ async = of_get_property(opal_node, "opal-msg-async-num", NULL);
+ if (!async) {
+ pr_err("%s: %pOF has no opal-msg-async-num\n",
+ __func__, opal_node);
+ err = -ENOENT;
+ goto out_opal_node;
+ }
+
+ opal_max_async_tokens = be32_to_cpup(async);
+ opal_async_tokens = kcalloc(opal_max_async_tokens,
+ sizeof(*opal_async_tokens), GFP_KERNEL);
+ if (!opal_async_tokens) {
+ err = -ENOMEM;
+ goto out_opal_node;
+ }
+
+ err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP,
+ &opal_async_comp_nb);
+ if (err) {
+ pr_err("%s: Can't register OPAL event notifier (%d)\n",
+ __func__, err);
+ kfree(opal_async_tokens);
+ goto out_opal_node;
+ }
+
+ sema_init(&opal_async_sem, opal_max_async_tokens);
+
+out_opal_node:
+ of_node_put(opal_node);
+out:
+ return err;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
new file mode 100644
index 000000000..021b0ec29
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/percpu.h>
+#include <linux/jump_label.h>
+#include <asm/interrupt.h>
+#include <asm/opal-api.h>
+#include <asm/trace.h>
+#include <asm/asm-prototypes.h>
+
+#ifdef CONFIG_TRACEPOINTS
+/*
+ * Since the tracing code might execute OPAL calls we need to guard against
+ * recursion.
+ */
+static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
+
+static void __trace_opal_entry(s64 a0, s64 a1, s64 a2, s64 a3,
+ s64 a4, s64 a5, s64 a6, s64 a7,
+ unsigned long opcode)
+{
+ unsigned int *depth;
+ unsigned long args[8];
+
+ depth = this_cpu_ptr(&opal_trace_depth);
+
+ if (*depth)
+ return;
+
+ args[0] = a0;
+ args[1] = a1;
+ args[2] = a2;
+ args[3] = a3;
+ args[4] = a4;
+ args[5] = a5;
+ args[6] = a6;
+ args[7] = a7;
+
+ (*depth)++;
+ trace_opal_entry(opcode, &args[0]);
+ (*depth)--;
+}
+
+static void __trace_opal_exit(unsigned long opcode, unsigned long retval)
+{
+ unsigned int *depth;
+
+ depth = this_cpu_ptr(&opal_trace_depth);
+
+ if (*depth)
+ return;
+
+ (*depth)++;
+ trace_opal_exit(opcode, retval);
+ (*depth)--;
+}
+
+static DEFINE_STATIC_KEY_FALSE(opal_tracepoint_key);
+
+int opal_tracepoint_regfunc(void)
+{
+ static_branch_inc(&opal_tracepoint_key);
+ return 0;
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+ static_branch_dec(&opal_tracepoint_key);
+}
+
+static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3,
+ s64 a4, s64 a5, s64 a6, s64 a7,
+ unsigned long opcode, unsigned long msr)
+{
+ s64 ret;
+
+ __trace_opal_entry(a0, a1, a2, a3, a4, a5, a6, a7, opcode);
+ ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+ __trace_opal_exit(opcode, ret);
+
+ return ret;
+}
+
+#define DO_TRACE (static_branch_unlikely(&opal_tracepoint_key))
+
+#else /* CONFIG_TRACEPOINTS */
+
+static s64 __opal_call_trace(s64 a0, s64 a1, s64 a2, s64 a3,
+ s64 a4, s64 a5, s64 a6, s64 a7,
+ unsigned long opcode, unsigned long msr)
+{
+ return 0;
+}
+
+#define DO_TRACE false
+#endif /* CONFIG_TRACEPOINTS */
+
+static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7, int64_t opcode)
+{
+ unsigned long flags;
+ unsigned long msr = mfmsr();
+ bool mmu = (msr & (MSR_IR|MSR_DR));
+ int64_t ret;
+
+ /* OPAL call / firmware may use SRR and/or HSRR */
+ srr_regs_clobbered();
+
+ msr &= ~MSR_EE;
+
+ if (unlikely(!mmu))
+ return __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+
+ local_save_flags(flags);
+ hard_irq_disable();
+
+ if (DO_TRACE) {
+ ret = __opal_call_trace(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+ } else {
+ ret = __opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode, msr);
+ }
+
+ local_irq_restore(flags);
+
+ return ret;
+}
+
+#define OPAL_CALL(name, opcode) \
+int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7); \
+int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7) \
+{ \
+ return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \
+}
+
+OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
+OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
+OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
+OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE);
+OPAL_CALL(opal_rtc_read, OPAL_RTC_READ);
+OPAL_CALL(opal_rtc_write, OPAL_RTC_WRITE);
+OPAL_CALL(opal_cec_power_down, OPAL_CEC_POWER_DOWN);
+OPAL_CALL(opal_cec_reboot, OPAL_CEC_REBOOT);
+OPAL_CALL(opal_cec_reboot2, OPAL_CEC_REBOOT2);
+OPAL_CALL(opal_read_nvram, OPAL_READ_NVRAM);
+OPAL_CALL(opal_write_nvram, OPAL_WRITE_NVRAM);
+OPAL_CALL(opal_handle_interrupt, OPAL_HANDLE_INTERRUPT);
+OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS);
+OPAL_CALL(opal_pci_set_hub_tce_memory, OPAL_PCI_SET_HUB_TCE_MEMORY);
+OPAL_CALL(opal_pci_set_phb_tce_memory, OPAL_PCI_SET_PHB_TCE_MEMORY);
+OPAL_CALL(opal_pci_config_read_byte, OPAL_PCI_CONFIG_READ_BYTE);
+OPAL_CALL(opal_pci_config_read_half_word, OPAL_PCI_CONFIG_READ_HALF_WORD);
+OPAL_CALL(opal_pci_config_read_word, OPAL_PCI_CONFIG_READ_WORD);
+OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
+OPAL_CALL(opal_pci_config_write_half_word, OPAL_PCI_CONFIG_WRITE_HALF_WORD);
+OPAL_CALL(opal_pci_config_write_word, OPAL_PCI_CONFIG_WRITE_WORD);
+OPAL_CALL(opal_set_xive, OPAL_SET_XIVE);
+OPAL_CALL(opal_get_xive, OPAL_GET_XIVE);
+OPAL_CALL(opal_register_exception_handler, OPAL_REGISTER_OPAL_EXCEPTION_HANDLER);
+OPAL_CALL(opal_pci_eeh_freeze_status, OPAL_PCI_EEH_FREEZE_STATUS);
+OPAL_CALL(opal_pci_eeh_freeze_clear, OPAL_PCI_EEH_FREEZE_CLEAR);
+OPAL_CALL(opal_pci_eeh_freeze_set, OPAL_PCI_EEH_FREEZE_SET);
+OPAL_CALL(opal_pci_err_inject, OPAL_PCI_ERR_INJECT);
+OPAL_CALL(opal_pci_shpc, OPAL_PCI_SHPC);
+OPAL_CALL(opal_pci_phb_mmio_enable, OPAL_PCI_PHB_MMIO_ENABLE);
+OPAL_CALL(opal_pci_set_phb_mem_window, OPAL_PCI_SET_PHB_MEM_WINDOW);
+OPAL_CALL(opal_pci_map_pe_mmio_window, OPAL_PCI_MAP_PE_MMIO_WINDOW);
+OPAL_CALL(opal_pci_set_phb_table_memory, OPAL_PCI_SET_PHB_TABLE_MEMORY);
+OPAL_CALL(opal_pci_set_pe, OPAL_PCI_SET_PE);
+OPAL_CALL(opal_pci_set_peltv, OPAL_PCI_SET_PELTV);
+OPAL_CALL(opal_pci_get_xive_reissue, OPAL_PCI_GET_XIVE_REISSUE);
+OPAL_CALL(opal_pci_set_xive_reissue, OPAL_PCI_SET_XIVE_REISSUE);
+OPAL_CALL(opal_pci_set_xive_pe, OPAL_PCI_SET_XIVE_PE);
+OPAL_CALL(opal_get_xive_source, OPAL_GET_XIVE_SOURCE);
+OPAL_CALL(opal_get_msi_32, OPAL_GET_MSI_32);
+OPAL_CALL(opal_get_msi_64, OPAL_GET_MSI_64);
+OPAL_CALL(opal_start_cpu, OPAL_START_CPU);
+OPAL_CALL(opal_query_cpu_status, OPAL_QUERY_CPU_STATUS);
+OPAL_CALL(opal_write_oppanel, OPAL_WRITE_OPPANEL);
+OPAL_CALL(opal_pci_map_pe_dma_window, OPAL_PCI_MAP_PE_DMA_WINDOW);
+OPAL_CALL(opal_pci_map_pe_dma_window_real, OPAL_PCI_MAP_PE_DMA_WINDOW_REAL);
+OPAL_CALL(opal_pci_reset, OPAL_PCI_RESET);
+OPAL_CALL(opal_pci_get_hub_diag_data, OPAL_PCI_GET_HUB_DIAG_DATA);
+OPAL_CALL(opal_pci_get_phb_diag_data, OPAL_PCI_GET_PHB_DIAG_DATA);
+OPAL_CALL(opal_pci_fence_phb, OPAL_PCI_FENCE_PHB);
+OPAL_CALL(opal_pci_reinit, OPAL_PCI_REINIT);
+OPAL_CALL(opal_pci_mask_pe_error, OPAL_PCI_MASK_PE_ERROR);
+OPAL_CALL(opal_set_slot_led_status, OPAL_SET_SLOT_LED_STATUS);
+OPAL_CALL(opal_get_epow_status, OPAL_GET_EPOW_STATUS);
+OPAL_CALL(opal_get_dpo_status, OPAL_GET_DPO_STATUS);
+OPAL_CALL(opal_set_system_attention_led, OPAL_SET_SYSTEM_ATTENTION_LED);
+OPAL_CALL(opal_pci_next_error, OPAL_PCI_NEXT_ERROR);
+OPAL_CALL(opal_pci_poll, OPAL_PCI_POLL);
+OPAL_CALL(opal_pci_msi_eoi, OPAL_PCI_MSI_EOI);
+OPAL_CALL(opal_pci_get_phb_diag_data2, OPAL_PCI_GET_PHB_DIAG_DATA2);
+OPAL_CALL(opal_xscom_read, OPAL_XSCOM_READ);
+OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE);
+OPAL_CALL(opal_lpc_read, OPAL_LPC_READ);
+OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE);
+OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU);
+OPAL_CALL(opal_reinit_cpus, OPAL_REINIT_CPUS);
+OPAL_CALL(opal_read_elog, OPAL_ELOG_READ);
+OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK);
+OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE);
+OPAL_CALL(opal_resend_pending_logs, OPAL_ELOG_RESEND);
+OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE);
+OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
+OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE);
+OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE);
+OPAL_CALL(opal_resync_timebase, OPAL_RESYNC_TIMEBASE);
+OPAL_CALL(opal_check_token, OPAL_CHECK_TOKEN);
+OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT);
+OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO);
+OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2);
+OPAL_CALL(opal_dump_read, OPAL_DUMP_READ);
+OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK);
+OPAL_CALL(opal_get_msg, OPAL_GET_MSG);
+OPAL_CALL(opal_write_oppanel_async, OPAL_WRITE_OPPANEL_ASYNC);
+OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION);
+OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND);
+OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT);
+OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
+OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
+OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
+OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_handle_hmi2, OPAL_HANDLE_HMI2);
+OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
+OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
+OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
+OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
+OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CAPI_MODE);
+OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO);
+OPAL_CALL(opal_tpo_read, OPAL_READ_TPO);
+OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND);
+OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
+OPAL_CALL(opal_flash_read, OPAL_FLASH_READ);
+OPAL_CALL(opal_flash_write, OPAL_FLASH_WRITE);
+OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE);
+OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG);
+OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR);
+OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR);
+OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH);
+OPAL_CALL(opal_get_device_tree, OPAL_GET_DEVICE_TREE);
+OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
+OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
+OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
+OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
+OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
+OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
+OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
+OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
+OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR);
+OPAL_CALL(opal_xive_reset, OPAL_XIVE_RESET);
+OPAL_CALL(opal_xive_get_irq_info, OPAL_XIVE_GET_IRQ_INFO);
+OPAL_CALL(opal_xive_get_irq_config, OPAL_XIVE_GET_IRQ_CONFIG);
+OPAL_CALL(opal_xive_set_irq_config, OPAL_XIVE_SET_IRQ_CONFIG);
+OPAL_CALL(opal_xive_get_queue_info, OPAL_XIVE_GET_QUEUE_INFO);
+OPAL_CALL(opal_xive_set_queue_info, OPAL_XIVE_SET_QUEUE_INFO);
+OPAL_CALL(opal_xive_donate_page, OPAL_XIVE_DONATE_PAGE);
+OPAL_CALL(opal_xive_alloc_vp_block, OPAL_XIVE_ALLOCATE_VP_BLOCK);
+OPAL_CALL(opal_xive_free_vp_block, OPAL_XIVE_FREE_VP_BLOCK);
+OPAL_CALL(opal_xive_allocate_irq_raw, OPAL_XIVE_ALLOCATE_IRQ);
+OPAL_CALL(opal_xive_free_irq, OPAL_XIVE_FREE_IRQ);
+OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
+OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
+OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
+OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE);
+OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE);
+OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE);
+OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
+OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
+OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
+OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
+OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP);
+OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP);
+OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP);
+OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
+OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_quiesce, OPAL_QUIESCE);
+OPAL_CALL(opal_npu_spa_setup, OPAL_NPU_SPA_SETUP);
+OPAL_CALL(opal_npu_spa_clear_cache, OPAL_NPU_SPA_CLEAR_CACHE);
+OPAL_CALL(opal_npu_tl_set, OPAL_NPU_TL_SET);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar, OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar, OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_sensor_read_u64, OPAL_SENSOR_READ_U64);
+OPAL_CALL(opal_sensor_group_enable, OPAL_SENSOR_GROUP_ENABLE);
+OPAL_CALL(opal_nx_coproc_init, OPAL_NX_COPROC_INIT);
+OPAL_CALL(opal_mpipl_update, OPAL_MPIPL_UPDATE);
+OPAL_CALL(opal_mpipl_register_tag, OPAL_MPIPL_REGISTER_TAG);
+OPAL_CALL(opal_mpipl_query_tag, OPAL_MPIPL_QUERY_TAG);
+OPAL_CALL(opal_secvar_get, OPAL_SECVAR_GET);
+OPAL_CALL(opal_secvar_get_next, OPAL_SECVAR_GET_NEXT);
+OPAL_CALL(opal_secvar_enqueue_update, OPAL_SECVAR_ENQUEUE_UPDATE);
diff --git a/arch/powerpc/platforms/powernv/opal-core.c b/arch/powerpc/platforms/powernv/opal-core.c
new file mode 100644
index 000000000..bb7657115
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-core.c
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Interface for exporting the OPAL ELF core.
+ * Heavily inspired from fs/proc/vmcore.c
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "opal core: " fmt
+
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/crash_core.h>
+#include <linux/of.h>
+
+#include <asm/page.h>
+#include <asm/opal.h>
+#include <asm/fadump-internal.h>
+
+#include "opal-fadump.h"
+
+#define MAX_PT_LOAD_CNT 8
+
+/* NT_AUXV note related info */
+#define AUXV_CNT 1
+#define AUXV_DESC_SZ (((2 * AUXV_CNT) + 1) * sizeof(Elf64_Off))
+
+struct opalcore_config {
+ u32 num_cpus;
+ /* PIR value of crashing CPU */
+ u32 crashing_cpu;
+
+ /* CPU state data info from F/W */
+ u64 cpu_state_destination_vaddr;
+ u64 cpu_state_data_size;
+ u64 cpu_state_entry_size;
+
+ /* OPAL memory to be exported as PT_LOAD segments */
+ u64 ptload_addr[MAX_PT_LOAD_CNT];
+ u64 ptload_size[MAX_PT_LOAD_CNT];
+ u64 ptload_cnt;
+
+ /* Pointer to the first PT_LOAD in the ELF core file */
+ Elf64_Phdr *ptload_phdr;
+
+ /* Total size of opalcore file. */
+ size_t opalcore_size;
+
+ /* Buffer for all the ELF core headers and the PT_NOTE */
+ size_t opalcorebuf_sz;
+ char *opalcorebuf;
+
+ /* NT_AUXV buffer */
+ char auxv_buf[AUXV_DESC_SZ];
+};
+
+struct opalcore {
+ struct list_head list;
+ u64 paddr;
+ size_t size;
+ loff_t offset;
+};
+
+static LIST_HEAD(opalcore_list);
+static struct opalcore_config *oc_conf;
+static const struct opal_mpipl_fadump *opalc_metadata;
+static const struct opal_mpipl_fadump *opalc_cpu_metadata;
+static struct kobject *mpipl_kobj;
+
+/*
+ * Set crashing CPU's signal to SIGUSR1. if the kernel is triggered
+ * by kernel, SIGTERM otherwise.
+ */
+bool kernel_initiated;
+
+static struct opalcore * __init get_new_element(void)
+{
+ return kzalloc(sizeof(struct opalcore), GFP_KERNEL);
+}
+
+static inline int is_opalcore_usable(void)
+{
+ return (oc_conf && oc_conf->opalcorebuf != NULL) ? 1 : 0;
+}
+
+static Elf64_Word *__init append_elf64_note(Elf64_Word *buf, char *name,
+ u32 type, void *data,
+ size_t data_len)
+{
+ Elf64_Nhdr *note = (Elf64_Nhdr *)buf;
+ Elf64_Word namesz = strlen(name) + 1;
+
+ note->n_namesz = cpu_to_be32(namesz);
+ note->n_descsz = cpu_to_be32(data_len);
+ note->n_type = cpu_to_be32(type);
+ buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf64_Word));
+ memcpy(buf, name, namesz);
+ buf += DIV_ROUND_UP(namesz, sizeof(Elf64_Word));
+ memcpy(buf, data, data_len);
+ buf += DIV_ROUND_UP(data_len, sizeof(Elf64_Word));
+
+ return buf;
+}
+
+static void __init fill_prstatus(struct elf_prstatus *prstatus, int pir,
+ struct pt_regs *regs)
+{
+ memset(prstatus, 0, sizeof(struct elf_prstatus));
+ elf_core_copy_regs(&(prstatus->pr_reg), regs);
+
+ /*
+ * Overload PID with PIR value.
+ * As a PIR value could also be '0', add an offset of '100'
+ * to every PIR to avoid misinterpretations in GDB.
+ */
+ prstatus->common.pr_pid = cpu_to_be32(100 + pir);
+ prstatus->common.pr_ppid = cpu_to_be32(1);
+
+ /*
+ * Indicate SIGUSR1 for crash initiated from kernel.
+ * SIGTERM otherwise.
+ */
+ if (pir == oc_conf->crashing_cpu) {
+ short sig;
+
+ sig = kernel_initiated ? SIGUSR1 : SIGTERM;
+ prstatus->common.pr_cursig = cpu_to_be16(sig);
+ }
+}
+
+static Elf64_Word *__init auxv_to_elf64_notes(Elf64_Word *buf,
+ u64 opal_boot_entry)
+{
+ Elf64_Off *bufp = (Elf64_Off *)oc_conf->auxv_buf;
+ int idx = 0;
+
+ memset(bufp, 0, AUXV_DESC_SZ);
+
+ /* Entry point of OPAL */
+ bufp[idx++] = cpu_to_be64(AT_ENTRY);
+ bufp[idx++] = cpu_to_be64(opal_boot_entry);
+
+ /* end of vector */
+ bufp[idx++] = cpu_to_be64(AT_NULL);
+
+ buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_AUXV,
+ oc_conf->auxv_buf, AUXV_DESC_SZ);
+ return buf;
+}
+
+/*
+ * Read from the ELF header and then the crash dump.
+ * Returns number of bytes read on success, -errno on failure.
+ */
+static ssize_t read_opalcore(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *to,
+ loff_t pos, size_t count)
+{
+ struct opalcore *m;
+ ssize_t tsz, avail;
+ loff_t tpos = pos;
+
+ if (pos >= oc_conf->opalcore_size)
+ return 0;
+
+ /* Adjust count if it goes beyond opalcore size */
+ avail = oc_conf->opalcore_size - pos;
+ if (count > avail)
+ count = avail;
+
+ if (count == 0)
+ return 0;
+
+ /* Read ELF core header and/or PT_NOTE segment */
+ if (tpos < oc_conf->opalcorebuf_sz) {
+ tsz = min_t(size_t, oc_conf->opalcorebuf_sz - tpos, count);
+ memcpy(to, oc_conf->opalcorebuf + tpos, tsz);
+ to += tsz;
+ tpos += tsz;
+ count -= tsz;
+ }
+
+ list_for_each_entry(m, &opalcore_list, list) {
+ /* nothing more to read here */
+ if (count == 0)
+ break;
+
+ if (tpos < m->offset + m->size) {
+ void *addr;
+
+ tsz = min_t(size_t, m->offset + m->size - tpos, count);
+ addr = (void *)(m->paddr + tpos - m->offset);
+ memcpy(to, __va(addr), tsz);
+ to += tsz;
+ tpos += tsz;
+ count -= tsz;
+ }
+ }
+
+ return (tpos - pos);
+}
+
+static struct bin_attribute opal_core_attr = {
+ .attr = {.name = "core", .mode = 0400},
+ .read = read_opalcore
+};
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ *
+ * Each register entry is of 16 bytes, A numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation.
+ */
+static Elf64_Word * __init opalcore_append_cpu_notes(Elf64_Word *buf)
+{
+ u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
+ struct hdat_fadump_thread_hdr *thdr;
+ struct elf_prstatus prstatus;
+ Elf64_Word *first_cpu_note;
+ struct pt_regs regs;
+ char *bufp;
+ int i;
+
+ size_per_thread = oc_conf->cpu_state_entry_size;
+ bufp = __va(oc_conf->cpu_state_destination_vaddr);
+
+ /*
+ * Offset for register entries, entry size and registers count is
+ * duplicated in every thread header in keeping with HDAT format.
+ * Use these values from the first thread header.
+ */
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+ regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+ be32_to_cpu(thdr->offset));
+ reg_esize = be32_to_cpu(thdr->esize);
+ regs_cnt = be32_to_cpu(thdr->ecnt);
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("NumCpus : %u\n", oc_conf->num_cpus);
+ pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+ regs_offset, reg_esize, regs_cnt);
+
+ /*
+ * Skip past the first CPU note. Fill this note with the
+ * crashing CPU's prstatus.
+ */
+ first_cpu_note = buf;
+ buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
+ &prstatus, sizeof(prstatus));
+
+ for (i = 0; i < oc_conf->num_cpus; i++, bufp += size_per_thread) {
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+ thread_pir = be32_to_cpu(thdr->pir);
+
+ pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
+ i, thread_pir, thdr->core_state);
+
+ /*
+ * Register state data of MAX cores is provided by firmware,
+ * but some of this cores may not be active. So, while
+ * processing register state data, check core state and
+ * skip threads that belong to inactive cores.
+ */
+ if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
+ continue;
+
+ opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+ reg_esize, false, &regs);
+
+ pr_debug("PIR 0x%x - R1 : 0x%llx, NIP : 0x%llx\n", thread_pir,
+ be64_to_cpu(regs.gpr[1]), be64_to_cpu(regs.nip));
+ fill_prstatus(&prstatus, thread_pir, &regs);
+
+ if (thread_pir != oc_conf->crashing_cpu) {
+ buf = append_elf64_note(buf, CRASH_CORE_NOTE_NAME,
+ NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ } else {
+ /*
+ * Add crashing CPU as the first NT_PRSTATUS note for
+ * GDB to process the core file appropriately.
+ */
+ append_elf64_note(first_cpu_note, CRASH_CORE_NOTE_NAME,
+ NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ }
+ }
+
+ return buf;
+}
+
+static int __init create_opalcore(void)
+{
+ u64 opal_boot_entry, opal_base_addr, paddr;
+ u32 hdr_size, cpu_notes_size, count;
+ struct device_node *dn;
+ struct opalcore *new;
+ loff_t opalcore_off;
+ struct page *page;
+ Elf64_Phdr *phdr;
+ Elf64_Ehdr *elf;
+ int i, ret;
+ char *bufp;
+
+ /* Get size of header & CPU notes for OPAL core */
+ hdr_size = (sizeof(Elf64_Ehdr) +
+ ((oc_conf->ptload_cnt + 1) * sizeof(Elf64_Phdr)));
+ cpu_notes_size = ((oc_conf->num_cpus * (CRASH_CORE_NOTE_HEAD_BYTES +
+ CRASH_CORE_NOTE_NAME_BYTES +
+ CRASH_CORE_NOTE_DESC_BYTES)) +
+ (CRASH_CORE_NOTE_HEAD_BYTES +
+ CRASH_CORE_NOTE_NAME_BYTES + AUXV_DESC_SZ));
+
+ /* Allocate buffer to setup OPAL core */
+ oc_conf->opalcorebuf_sz = PAGE_ALIGN(hdr_size + cpu_notes_size);
+ oc_conf->opalcorebuf = alloc_pages_exact(oc_conf->opalcorebuf_sz,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!oc_conf->opalcorebuf) {
+ pr_err("Not enough memory to setup OPAL core (size: %lu)\n",
+ oc_conf->opalcorebuf_sz);
+ oc_conf->opalcorebuf_sz = 0;
+ return -ENOMEM;
+ }
+ count = oc_conf->opalcorebuf_sz / PAGE_SIZE;
+ page = virt_to_page(oc_conf->opalcorebuf);
+ for (i = 0; i < count; i++)
+ mark_page_reserved(page + i);
+
+ pr_debug("opalcorebuf = 0x%llx\n", (u64)oc_conf->opalcorebuf);
+
+ /* Read OPAL related device-tree entries */
+ dn = of_find_node_by_name(NULL, "ibm,opal");
+ if (dn) {
+ ret = of_property_read_u64(dn, "opal-base-address",
+ &opal_base_addr);
+ pr_debug("opal-base-address: %llx\n", opal_base_addr);
+ ret |= of_property_read_u64(dn, "opal-boot-address",
+ &opal_boot_entry);
+ pr_debug("opal-boot-address: %llx\n", opal_boot_entry);
+ }
+ if (!dn || ret)
+ pr_warn("WARNING: Failed to read OPAL base & entry values\n");
+
+ of_node_put(dn);
+
+ /* Use count to keep track of the program headers */
+ count = 0;
+
+ bufp = oc_conf->opalcorebuf;
+ elf = (Elf64_Ehdr *)bufp;
+ bufp += sizeof(Elf64_Ehdr);
+ memcpy(elf->e_ident, ELFMAG, SELFMAG);
+ elf->e_ident[EI_CLASS] = ELF_CLASS;
+ elf->e_ident[EI_DATA] = ELFDATA2MSB;
+ elf->e_ident[EI_VERSION] = EV_CURRENT;
+ elf->e_ident[EI_OSABI] = ELF_OSABI;
+ memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
+ elf->e_type = cpu_to_be16(ET_CORE);
+ elf->e_machine = cpu_to_be16(ELF_ARCH);
+ elf->e_version = cpu_to_be32(EV_CURRENT);
+ elf->e_entry = 0;
+ elf->e_phoff = cpu_to_be64(sizeof(Elf64_Ehdr));
+ elf->e_shoff = 0;
+ elf->e_flags = 0;
+
+ elf->e_ehsize = cpu_to_be16(sizeof(Elf64_Ehdr));
+ elf->e_phentsize = cpu_to_be16(sizeof(Elf64_Phdr));
+ elf->e_phnum = 0;
+ elf->e_shentsize = 0;
+ elf->e_shnum = 0;
+ elf->e_shstrndx = 0;
+
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = cpu_to_be32(PT_NOTE);
+ phdr->p_flags = 0;
+ phdr->p_align = 0;
+ phdr->p_paddr = phdr->p_vaddr = 0;
+ phdr->p_offset = cpu_to_be64(hdr_size);
+ phdr->p_filesz = phdr->p_memsz = cpu_to_be64(cpu_notes_size);
+ count++;
+
+ opalcore_off = oc_conf->opalcorebuf_sz;
+ oc_conf->ptload_phdr = (Elf64_Phdr *)bufp;
+ paddr = 0;
+ for (i = 0; i < oc_conf->ptload_cnt; i++) {
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = cpu_to_be32(PT_LOAD);
+ phdr->p_flags = cpu_to_be32(PF_R|PF_W|PF_X);
+ phdr->p_align = 0;
+
+ new = get_new_element();
+ if (!new)
+ return -ENOMEM;
+ new->paddr = oc_conf->ptload_addr[i];
+ new->size = oc_conf->ptload_size[i];
+ new->offset = opalcore_off;
+ list_add_tail(&new->list, &opalcore_list);
+
+ phdr->p_paddr = cpu_to_be64(paddr);
+ phdr->p_vaddr = cpu_to_be64(opal_base_addr + paddr);
+ phdr->p_filesz = phdr->p_memsz =
+ cpu_to_be64(oc_conf->ptload_size[i]);
+ phdr->p_offset = cpu_to_be64(opalcore_off);
+
+ count++;
+ opalcore_off += oc_conf->ptload_size[i];
+ paddr += oc_conf->ptload_size[i];
+ }
+
+ elf->e_phnum = cpu_to_be16(count);
+
+ bufp = (char *)opalcore_append_cpu_notes((Elf64_Word *)bufp);
+ bufp = (char *)auxv_to_elf64_notes((Elf64_Word *)bufp, opal_boot_entry);
+
+ oc_conf->opalcore_size = opalcore_off;
+ return 0;
+}
+
+static void opalcore_cleanup(void)
+{
+ if (oc_conf == NULL)
+ return;
+
+ /* Remove OPAL core sysfs file */
+ sysfs_remove_bin_file(mpipl_kobj, &opal_core_attr);
+ oc_conf->ptload_phdr = NULL;
+ oc_conf->ptload_cnt = 0;
+
+ /* free the buffer used for setting up OPAL core */
+ if (oc_conf->opalcorebuf) {
+ void *end = (void *)((u64)oc_conf->opalcorebuf +
+ oc_conf->opalcorebuf_sz);
+
+ free_reserved_area(oc_conf->opalcorebuf, end, -1, NULL);
+ oc_conf->opalcorebuf = NULL;
+ oc_conf->opalcorebuf_sz = 0;
+ }
+
+ kfree(oc_conf);
+ oc_conf = NULL;
+}
+__exitcall(opalcore_cleanup);
+
+static void __init opalcore_config_init(void)
+{
+ u32 idx, cpu_data_version;
+ struct device_node *np;
+ const __be32 *prop;
+ u64 addr = 0;
+ int i, ret;
+
+ np = of_find_node_by_path("/ibm,opal/dump");
+ if (np == NULL)
+ return;
+
+ if (!of_device_is_compatible(np, "ibm,opal-dump")) {
+ pr_warn("Support missing for this f/w version!\n");
+ return;
+ }
+
+ /* Check if dump has been initiated on last reboot */
+ prop = of_get_property(np, "mpipl-boot", NULL);
+ if (!prop) {
+ of_node_put(np);
+ return;
+ }
+
+ /* Get OPAL metadata */
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_OPAL, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get OPAL metadata (%d)\n", ret);
+ goto error_out;
+ }
+
+ addr = be64_to_cpu(addr);
+ pr_debug("OPAL metadata addr: %llx\n", addr);
+ opalc_metadata = __va(addr);
+
+ /* Get OPAL CPU metadata */
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get OPAL CPU metadata (%d)\n", ret);
+ goto error_out;
+ }
+
+ addr = be64_to_cpu(addr);
+ pr_debug("CPU metadata addr: %llx\n", addr);
+ opalc_cpu_metadata = __va(addr);
+
+ /* Allocate memory for config buffer */
+ oc_conf = kzalloc(sizeof(struct opalcore_config), GFP_KERNEL);
+ if (oc_conf == NULL)
+ goto error_out;
+
+ /* Parse OPAL metadata */
+ if (opalc_metadata->version != OPAL_MPIPL_VERSION) {
+ pr_warn("Supported OPAL metadata version: %u, found: %u!\n",
+ OPAL_MPIPL_VERSION, opalc_metadata->version);
+ pr_warn("WARNING: F/W using newer OPAL metadata format!!\n");
+ }
+
+ oc_conf->ptload_cnt = 0;
+ idx = be32_to_cpu(opalc_metadata->region_cnt);
+ if (idx > MAX_PT_LOAD_CNT) {
+ pr_warn("WARNING: OPAL regions count (%d) adjusted to limit (%d)",
+ idx, MAX_PT_LOAD_CNT);
+ idx = MAX_PT_LOAD_CNT;
+ }
+ for (i = 0; i < idx; i++) {
+ oc_conf->ptload_addr[oc_conf->ptload_cnt] =
+ be64_to_cpu(opalc_metadata->region[i].dest);
+ oc_conf->ptload_size[oc_conf->ptload_cnt++] =
+ be64_to_cpu(opalc_metadata->region[i].size);
+ }
+ oc_conf->ptload_cnt = i;
+ oc_conf->crashing_cpu = be32_to_cpu(opalc_metadata->crashing_pir);
+
+ if (!oc_conf->ptload_cnt) {
+ pr_err("OPAL memory regions not found\n");
+ goto error_out;
+ }
+
+ /* Parse OPAL CPU metadata */
+ cpu_data_version = be32_to_cpu(opalc_cpu_metadata->cpu_data_version);
+ if (cpu_data_version != HDAT_FADUMP_CPU_DATA_VER) {
+ pr_warn("Supported CPU data version: %u, found: %u!\n",
+ HDAT_FADUMP_CPU_DATA_VER, cpu_data_version);
+ pr_warn("WARNING: F/W using newer CPU state data format!!\n");
+ }
+
+ addr = be64_to_cpu(opalc_cpu_metadata->region[0].dest);
+ if (!addr) {
+ pr_err("CPU state data not found!\n");
+ goto error_out;
+ }
+ oc_conf->cpu_state_destination_vaddr = (u64)__va(addr);
+
+ oc_conf->cpu_state_data_size =
+ be64_to_cpu(opalc_cpu_metadata->region[0].size);
+ oc_conf->cpu_state_entry_size =
+ be32_to_cpu(opalc_cpu_metadata->cpu_data_size);
+
+ if ((oc_conf->cpu_state_entry_size == 0) ||
+ (oc_conf->cpu_state_entry_size > oc_conf->cpu_state_data_size)) {
+ pr_err("CPU state data is invalid.\n");
+ goto error_out;
+ }
+ oc_conf->num_cpus = (oc_conf->cpu_state_data_size /
+ oc_conf->cpu_state_entry_size);
+
+ of_node_put(np);
+ return;
+
+error_out:
+ pr_err("Could not export /sys/firmware/opal/core\n");
+ opalcore_cleanup();
+ of_node_put(np);
+}
+
+static ssize_t release_core_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ int input = -1;
+
+ if (kstrtoint(buf, 0, &input))
+ return -EINVAL;
+
+ if (input == 1) {
+ if (oc_conf == NULL) {
+ pr_err("'/sys/firmware/opal/core' file not accessible!\n");
+ return -EPERM;
+ }
+
+ /*
+ * Take away '/sys/firmware/opal/core' and release all memory
+ * used for exporting this file.
+ */
+ opalcore_cleanup();
+ } else
+ return -EINVAL;
+
+ return count;
+}
+
+static struct kobj_attribute opalcore_rel_attr = __ATTR_WO(release_core);
+
+static struct attribute *mpipl_attr[] = {
+ &opalcore_rel_attr.attr,
+ NULL,
+};
+
+static struct bin_attribute *mpipl_bin_attr[] = {
+ &opal_core_attr,
+ NULL,
+
+};
+
+static const struct attribute_group mpipl_group = {
+ .attrs = mpipl_attr,
+ .bin_attrs = mpipl_bin_attr,
+};
+
+static int __init opalcore_init(void)
+{
+ int rc = -1;
+
+ opalcore_config_init();
+
+ if (oc_conf == NULL)
+ return rc;
+
+ create_opalcore();
+
+ /*
+ * If oc_conf->opalcorebuf= is set in the 2nd kernel,
+ * then capture the dump.
+ */
+ if (!(is_opalcore_usable())) {
+ pr_err("Failed to export /sys/firmware/opal/mpipl/core\n");
+ opalcore_cleanup();
+ return rc;
+ }
+
+ /* Set OPAL core file size */
+ opal_core_attr.size = oc_conf->opalcore_size;
+
+ mpipl_kobj = kobject_create_and_add("mpipl", opal_kobj);
+ if (!mpipl_kobj) {
+ pr_err("unable to create mpipl kobject\n");
+ return -ENOMEM;
+ }
+
+ /* Export OPAL core sysfs file */
+ rc = sysfs_create_group(mpipl_kobj, &mpipl_group);
+ if (rc) {
+ pr_err("mpipl sysfs group creation failed (%d)", rc);
+ opalcore_cleanup();
+ return rc;
+ }
+ /* The /sys/firmware/opal/core is moved to /sys/firmware/opal/mpipl/
+ * directory, need to create symlink at old location to maintain
+ * backward compatibility.
+ */
+ rc = compat_only_sysfs_link_entry_to_kobj(opal_kobj, mpipl_kobj,
+ "core", NULL);
+ if (rc) {
+ pr_err("unable to create core symlink (%d)\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+fs_initcall(opalcore_init);
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
new file mode 100644
index 000000000..16c5860f1
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL Dump Interface
+ *
+ * Copyright 2013,2014 IBM Corp.
+ */
+
+#include <linux/kobject.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <asm/opal.h>
+
+#define DUMP_TYPE_FSP 0x01
+
+struct dump_obj {
+ struct kobject kobj;
+ struct bin_attribute dump_attr;
+ uint32_t id; /* becomes object name */
+ uint32_t type;
+ uint32_t size;
+ char *buffer;
+};
+#define to_dump_obj(x) container_of(x, struct dump_obj, kobj)
+
+struct dump_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct dump_obj *dump, struct dump_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct dump_obj *dump, struct dump_attribute *attr,
+ const char *buf, size_t count);
+};
+#define to_dump_attr(x) container_of(x, struct dump_attribute, attr)
+
+static ssize_t dump_id_show(struct dump_obj *dump_obj,
+ struct dump_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "0x%x\n", dump_obj->id);
+}
+
+static const char* dump_type_to_string(uint32_t type)
+{
+ switch (type) {
+ case 0x01: return "SP Dump";
+ case 0x02: return "System/Platform Dump";
+ case 0x03: return "SMA Dump";
+ default: return "unknown";
+ }
+}
+
+static ssize_t dump_type_show(struct dump_obj *dump_obj,
+ struct dump_attribute *attr,
+ char *buf)
+{
+
+ return sprintf(buf, "0x%x %s\n", dump_obj->type,
+ dump_type_to_string(dump_obj->type));
+}
+
+static ssize_t dump_ack_show(struct dump_obj *dump_obj,
+ struct dump_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "ack - acknowledge dump\n");
+}
+
+/*
+ * Send acknowledgement to OPAL
+ */
+static int64_t dump_send_ack(uint32_t dump_id)
+{
+ int rc;
+
+ rc = opal_dump_ack(dump_id);
+ if (rc)
+ pr_warn("%s: Failed to send ack to Dump ID 0x%x (%d)\n",
+ __func__, dump_id, rc);
+ return rc;
+}
+
+static ssize_t dump_ack_store(struct dump_obj *dump_obj,
+ struct dump_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ /*
+ * Try to self remove this attribute. If we are successful,
+ * delete the kobject itself.
+ */
+ if (sysfs_remove_file_self(&dump_obj->kobj, &attr->attr)) {
+ dump_send_ack(dump_obj->id);
+ kobject_put(&dump_obj->kobj);
+ }
+ return count;
+}
+
+/* Attributes of a dump
+ * The binary attribute of the dump itself is dynamic
+ * due to the dynamic size of the dump
+ */
+static struct dump_attribute id_attribute =
+ __ATTR(id, 0444, dump_id_show, NULL);
+static struct dump_attribute type_attribute =
+ __ATTR(type, 0444, dump_type_show, NULL);
+static struct dump_attribute ack_attribute =
+ __ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store);
+
+static ssize_t init_dump_show(struct dump_obj *dump_obj,
+ struct dump_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "1 - initiate Service Processor(FSP) dump\n");
+}
+
+static int64_t dump_fips_init(uint8_t type)
+{
+ int rc;
+
+ rc = opal_dump_init(type);
+ if (rc)
+ pr_warn("%s: Failed to initiate FSP dump (%d)\n",
+ __func__, rc);
+ return rc;
+}
+
+static ssize_t init_dump_store(struct dump_obj *dump_obj,
+ struct dump_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int rc;
+
+ rc = dump_fips_init(DUMP_TYPE_FSP);
+ if (rc == OPAL_SUCCESS)
+ pr_info("%s: Initiated FSP dump\n", __func__);
+
+ return count;
+}
+
+static struct dump_attribute initiate_attribute =
+ __ATTR(initiate_dump, 0600, init_dump_show, init_dump_store);
+
+static struct attribute *initiate_attrs[] = {
+ &initiate_attribute.attr,
+ NULL,
+};
+
+static const struct attribute_group initiate_attr_group = {
+ .attrs = initiate_attrs,
+};
+
+static struct kset *dump_kset;
+
+static ssize_t dump_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct dump_attribute *attribute;
+ struct dump_obj *dump;
+
+ attribute = to_dump_attr(attr);
+ dump = to_dump_obj(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(dump, attribute, buf);
+}
+
+static ssize_t dump_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct dump_attribute *attribute;
+ struct dump_obj *dump;
+
+ attribute = to_dump_attr(attr);
+ dump = to_dump_obj(kobj);
+
+ if (!attribute->store)
+ return -EIO;
+
+ return attribute->store(dump, attribute, buf, len);
+}
+
+static const struct sysfs_ops dump_sysfs_ops = {
+ .show = dump_attr_show,
+ .store = dump_attr_store,
+};
+
+static void dump_release(struct kobject *kobj)
+{
+ struct dump_obj *dump;
+
+ dump = to_dump_obj(kobj);
+ vfree(dump->buffer);
+ kfree(dump);
+}
+
+static struct attribute *dump_default_attrs[] = {
+ &id_attribute.attr,
+ &type_attribute.attr,
+ &ack_attribute.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(dump_default);
+
+static struct kobj_type dump_ktype = {
+ .sysfs_ops = &dump_sysfs_ops,
+ .release = &dump_release,
+ .default_groups = dump_default_groups,
+};
+
+static int64_t dump_read_info(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type)
+{
+ __be32 id, size, type;
+ int rc;
+
+ type = cpu_to_be32(0xffffffff);
+
+ rc = opal_dump_info2(&id, &size, &type);
+ if (rc == OPAL_PARAMETER)
+ rc = opal_dump_info(&id, &size);
+
+ if (rc) {
+ pr_warn("%s: Failed to get dump info (%d)\n",
+ __func__, rc);
+ return rc;
+ }
+
+ *dump_id = be32_to_cpu(id);
+ *dump_size = be32_to_cpu(size);
+ *dump_type = be32_to_cpu(type);
+
+ return rc;
+}
+
+static int64_t dump_read_data(struct dump_obj *dump)
+{
+ struct opal_sg_list *list;
+ uint64_t addr;
+ int64_t rc;
+
+ /* Allocate memory */
+ dump->buffer = vzalloc(PAGE_ALIGN(dump->size));
+ if (!dump->buffer) {
+ pr_err("%s : Failed to allocate memory\n", __func__);
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Generate SG list */
+ list = opal_vmalloc_to_sg_list(dump->buffer, dump->size);
+ if (!list) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* First entry address */
+ addr = __pa(list);
+
+ /* Fetch data */
+ rc = OPAL_BUSY_EVENT;
+ while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+ rc = opal_dump_read(dump->id, addr);
+ if (rc == OPAL_BUSY_EVENT) {
+ opal_poll_events(NULL);
+ msleep(20);
+ }
+ }
+
+ if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL)
+ pr_warn("%s: Extract dump failed for ID 0x%x\n",
+ __func__, dump->id);
+
+ /* Free SG list */
+ opal_free_sg_list(list);
+
+out:
+ return rc;
+}
+
+static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buffer, loff_t pos, size_t count)
+{
+ ssize_t rc;
+
+ struct dump_obj *dump = to_dump_obj(kobj);
+
+ if (!dump->buffer) {
+ rc = dump_read_data(dump);
+
+ if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) {
+ vfree(dump->buffer);
+ dump->buffer = NULL;
+
+ return -EIO;
+ }
+ if (rc == OPAL_PARTIAL) {
+ /* On a partial read, we just return EIO
+ * and rely on userspace to ask us to try
+ * again.
+ */
+ pr_info("%s: Platform dump partially read. ID = 0x%x\n",
+ __func__, dump->id);
+ return -EIO;
+ }
+ }
+
+ memcpy(buffer, dump->buffer + pos, count);
+
+ /* You may think we could free the dump buffer now and retrieve
+ * it again later if needed, but due to current firmware limitation,
+ * that's not the case. So, once read into userspace once,
+ * we keep the dump around until it's acknowledged by userspace.
+ */
+
+ return count;
+}
+
+static void create_dump_obj(uint32_t id, size_t size, uint32_t type)
+{
+ struct dump_obj *dump;
+ int rc;
+
+ dump = kzalloc(sizeof(*dump), GFP_KERNEL);
+ if (!dump)
+ return;
+
+ dump->kobj.kset = dump_kset;
+
+ kobject_init(&dump->kobj, &dump_ktype);
+
+ sysfs_bin_attr_init(&dump->dump_attr);
+
+ dump->dump_attr.attr.name = "dump";
+ dump->dump_attr.attr.mode = 0400;
+ dump->dump_attr.size = size;
+ dump->dump_attr.read = dump_attr_read;
+
+ dump->id = id;
+ dump->size = size;
+ dump->type = type;
+
+ rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id);
+ if (rc) {
+ kobject_put(&dump->kobj);
+ return;
+ }
+
+ /*
+ * As soon as the sysfs file for this dump is created/activated there is
+ * a chance the opal_errd daemon (or any userspace) might read and
+ * acknowledge the dump before kobject_uevent() is called. If that
+ * happens then there is a potential race between
+ * dump_ack_store->kobject_put() and kobject_uevent() which leads to a
+ * use-after-free of a kernfs object resulting in a kernel crash.
+ *
+ * To avoid that, we need to take a reference on behalf of the bin file,
+ * so that our reference remains valid while we call kobject_uevent().
+ * We then drop our reference before exiting the function, leaving the
+ * bin file to drop the last reference (if it hasn't already).
+ */
+
+ /* Take a reference for the bin file */
+ kobject_get(&dump->kobj);
+ rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr);
+ if (rc == 0) {
+ kobject_uevent(&dump->kobj, KOBJ_ADD);
+
+ pr_info("%s: New platform dump. ID = 0x%x Size %u\n",
+ __func__, dump->id, dump->size);
+ } else {
+ /* Drop reference count taken for bin file */
+ kobject_put(&dump->kobj);
+ }
+
+ /* Drop our reference */
+ kobject_put(&dump->kobj);
+ return;
+}
+
+static irqreturn_t process_dump(int irq, void *data)
+{
+ int rc;
+ uint32_t dump_id, dump_size, dump_type;
+ char name[22];
+ struct kobject *kobj;
+
+ rc = dump_read_info(&dump_id, &dump_size, &dump_type);
+ if (rc != OPAL_SUCCESS)
+ return IRQ_HANDLED;
+
+ sprintf(name, "0x%x-0x%x", dump_type, dump_id);
+
+ /* we may get notified twice, let's handle
+ * that gracefully and not create two conflicting
+ * entries.
+ */
+ kobj = kset_find_obj(dump_kset, name);
+ if (kobj) {
+ /* Drop reference added by kset_find_obj() */
+ kobject_put(kobj);
+ return IRQ_HANDLED;
+ }
+
+ create_dump_obj(dump_id, dump_size, dump_type);
+
+ return IRQ_HANDLED;
+}
+
+void __init opal_platform_dump_init(void)
+{
+ int rc;
+ int dump_irq;
+
+ /* Dump not supported by firmware */
+ if (!opal_check_token(OPAL_DUMP_READ))
+ return;
+
+ dump_kset = kset_create_and_add("dump", NULL, opal_kobj);
+ if (!dump_kset) {
+ pr_warn("%s: Failed to create dump kset\n", __func__);
+ return;
+ }
+
+ rc = sysfs_create_group(&dump_kset->kobj, &initiate_attr_group);
+ if (rc) {
+ pr_warn("%s: Failed to create initiate dump attr group\n",
+ __func__);
+ kobject_put(&dump_kset->kobj);
+ return;
+ }
+
+ dump_irq = opal_event_request(ilog2(OPAL_EVENT_DUMP_AVAIL));
+ if (!dump_irq) {
+ pr_err("%s: Can't register OPAL event irq (%d)\n",
+ __func__, dump_irq);
+ return;
+ }
+
+ rc = request_threaded_irq(dump_irq, NULL, process_dump,
+ IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+ "opal-dump", NULL);
+ if (rc) {
+ pr_err("%s: Can't request OPAL event irq (%d)\n",
+ __func__, rc);
+ return;
+ }
+
+ if (opal_check_token(OPAL_DUMP_RESEND))
+ opal_dump_resend_notification();
+}
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
new file mode 100644
index 000000000..554fdd7f8
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Error log support on PowerNV.
+ *
+ * Copyright 2013,2014 IBM Corp.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/fcntl.h>
+#include <linux/kobject.h>
+#include <linux/uaccess.h>
+#include <asm/opal.h>
+
+struct elog_obj {
+ struct kobject kobj;
+ struct bin_attribute raw_attr;
+ uint64_t id;
+ uint64_t type;
+ size_t size;
+ char *buffer;
+};
+#define to_elog_obj(x) container_of(x, struct elog_obj, kobj)
+
+struct elog_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct elog_obj *elog, struct elog_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct elog_obj *elog, struct elog_attribute *attr,
+ const char *buf, size_t count);
+};
+#define to_elog_attr(x) container_of(x, struct elog_attribute, attr)
+
+static ssize_t elog_id_show(struct elog_obj *elog_obj,
+ struct elog_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "0x%llx\n", elog_obj->id);
+}
+
+static const char *elog_type_to_string(uint64_t type)
+{
+ switch (type) {
+ case 0: return "PEL";
+ default: return "unknown";
+ }
+}
+
+static ssize_t elog_type_show(struct elog_obj *elog_obj,
+ struct elog_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "0x%llx %s\n",
+ elog_obj->type,
+ elog_type_to_string(elog_obj->type));
+}
+
+static ssize_t elog_ack_show(struct elog_obj *elog_obj,
+ struct elog_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "ack - acknowledge log message\n");
+}
+
+static ssize_t elog_ack_store(struct elog_obj *elog_obj,
+ struct elog_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ /*
+ * Try to self remove this attribute. If we are successful,
+ * delete the kobject itself.
+ */
+ if (sysfs_remove_file_self(&elog_obj->kobj, &attr->attr)) {
+ opal_send_ack_elog(elog_obj->id);
+ kobject_put(&elog_obj->kobj);
+ }
+ return count;
+}
+
+static struct elog_attribute id_attribute =
+ __ATTR(id, 0444, elog_id_show, NULL);
+static struct elog_attribute type_attribute =
+ __ATTR(type, 0444, elog_type_show, NULL);
+static struct elog_attribute ack_attribute =
+ __ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store);
+
+static struct kset *elog_kset;
+
+static ssize_t elog_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct elog_attribute *attribute;
+ struct elog_obj *elog;
+
+ attribute = to_elog_attr(attr);
+ elog = to_elog_obj(kobj);
+
+ if (!attribute->show)
+ return -EIO;
+
+ return attribute->show(elog, attribute, buf);
+}
+
+static ssize_t elog_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct elog_attribute *attribute;
+ struct elog_obj *elog;
+
+ attribute = to_elog_attr(attr);
+ elog = to_elog_obj(kobj);
+
+ if (!attribute->store)
+ return -EIO;
+
+ return attribute->store(elog, attribute, buf, len);
+}
+
+static const struct sysfs_ops elog_sysfs_ops = {
+ .show = elog_attr_show,
+ .store = elog_attr_store,
+};
+
+static void elog_release(struct kobject *kobj)
+{
+ struct elog_obj *elog;
+
+ elog = to_elog_obj(kobj);
+ kfree(elog->buffer);
+ kfree(elog);
+}
+
+static struct attribute *elog_default_attrs[] = {
+ &id_attribute.attr,
+ &type_attribute.attr,
+ &ack_attribute.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(elog_default);
+
+static struct kobj_type elog_ktype = {
+ .sysfs_ops = &elog_sysfs_ops,
+ .release = &elog_release,
+ .default_groups = elog_default_groups,
+};
+
+/* Maximum size of a single log on FSP is 16KB */
+#define OPAL_MAX_ERRLOG_SIZE 16384
+
+static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buffer, loff_t pos, size_t count)
+{
+ int opal_rc;
+
+ struct elog_obj *elog = to_elog_obj(kobj);
+
+ /* We may have had an error reading before, so let's retry */
+ if (!elog->buffer) {
+ elog->buffer = kzalloc(elog->size, GFP_KERNEL);
+ if (!elog->buffer)
+ return -EIO;
+
+ opal_rc = opal_read_elog(__pa(elog->buffer),
+ elog->size, elog->id);
+ if (opal_rc != OPAL_SUCCESS) {
+ pr_err_ratelimited("ELOG: log read failed for log-id=%llx\n",
+ elog->id);
+ kfree(elog->buffer);
+ elog->buffer = NULL;
+ return -EIO;
+ }
+ }
+
+ memcpy(buffer, elog->buffer + pos, count);
+
+ return count;
+}
+
+static void create_elog_obj(uint64_t id, size_t size, uint64_t type)
+{
+ struct elog_obj *elog;
+ int rc;
+
+ elog = kzalloc(sizeof(*elog), GFP_KERNEL);
+ if (!elog)
+ return;
+
+ elog->kobj.kset = elog_kset;
+
+ kobject_init(&elog->kobj, &elog_ktype);
+
+ sysfs_bin_attr_init(&elog->raw_attr);
+
+ elog->raw_attr.attr.name = "raw";
+ elog->raw_attr.attr.mode = 0400;
+ elog->raw_attr.size = size;
+ elog->raw_attr.read = raw_attr_read;
+
+ elog->id = id;
+ elog->size = size;
+ elog->type = type;
+
+ elog->buffer = kzalloc(elog->size, GFP_KERNEL);
+
+ if (elog->buffer) {
+ rc = opal_read_elog(__pa(elog->buffer),
+ elog->size, elog->id);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("ELOG: log read failed for log-id=%llx\n",
+ elog->id);
+ kfree(elog->buffer);
+ elog->buffer = NULL;
+ }
+ }
+
+ rc = kobject_add(&elog->kobj, NULL, "0x%llx", id);
+ if (rc) {
+ kobject_put(&elog->kobj);
+ return;
+ }
+
+ /*
+ * As soon as the sysfs file for this elog is created/activated there is
+ * a chance the opal_errd daemon (or any userspace) might read and
+ * acknowledge the elog before kobject_uevent() is called. If that
+ * happens then there is a potential race between
+ * elog_ack_store->kobject_put() and kobject_uevent() which leads to a
+ * use-after-free of a kernfs object resulting in a kernel crash.
+ *
+ * To avoid that, we need to take a reference on behalf of the bin file,
+ * so that our reference remains valid while we call kobject_uevent().
+ * We then drop our reference before exiting the function, leaving the
+ * bin file to drop the last reference (if it hasn't already).
+ */
+
+ /* Take a reference for the bin file */
+ kobject_get(&elog->kobj);
+ rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr);
+ if (rc == 0) {
+ kobject_uevent(&elog->kobj, KOBJ_ADD);
+ } else {
+ /* Drop the reference taken for the bin file */
+ kobject_put(&elog->kobj);
+ }
+
+ /* Drop our reference */
+ kobject_put(&elog->kobj);
+
+ return;
+}
+
+static irqreturn_t elog_event(int irq, void *data)
+{
+ __be64 size;
+ __be64 id;
+ __be64 type;
+ uint64_t elog_size;
+ uint64_t log_id;
+ uint64_t elog_type;
+ int rc;
+ char name[2+16+1];
+ struct kobject *kobj;
+
+ rc = opal_get_elog_size(&id, &size, &type);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("ELOG: OPAL log info read failed\n");
+ return IRQ_HANDLED;
+ }
+
+ elog_size = be64_to_cpu(size);
+ log_id = be64_to_cpu(id);
+ elog_type = be64_to_cpu(type);
+
+ WARN_ON(elog_size > OPAL_MAX_ERRLOG_SIZE);
+
+ if (elog_size >= OPAL_MAX_ERRLOG_SIZE)
+ elog_size = OPAL_MAX_ERRLOG_SIZE;
+
+ sprintf(name, "0x%llx", log_id);
+
+ /* we may get notified twice, let's handle
+ * that gracefully and not create two conflicting
+ * entries.
+ */
+ kobj = kset_find_obj(elog_kset, name);
+ if (kobj) {
+ /* Drop reference added by kset_find_obj() */
+ kobject_put(kobj);
+ return IRQ_HANDLED;
+ }
+
+ create_elog_obj(log_id, elog_size, elog_type);
+
+ return IRQ_HANDLED;
+}
+
+int __init opal_elog_init(void)
+{
+ int rc = 0, irq;
+
+ /* ELOG not supported by firmware */
+ if (!opal_check_token(OPAL_ELOG_READ))
+ return -1;
+
+ elog_kset = kset_create_and_add("elog", NULL, opal_kobj);
+ if (!elog_kset) {
+ pr_warn("%s: failed to create elog kset\n", __func__);
+ return -1;
+ }
+
+ irq = opal_event_request(ilog2(OPAL_EVENT_ERROR_LOG_AVAIL));
+ if (!irq) {
+ pr_err("%s: Can't register OPAL event irq (%d)\n",
+ __func__, irq);
+ return irq;
+ }
+
+ rc = request_threaded_irq(irq, NULL, elog_event,
+ IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL);
+ if (rc) {
+ pr_err("%s: Can't request OPAL event irq (%d)\n",
+ __func__, rc);
+ return rc;
+ }
+
+ /* We are now ready to pull error logs from opal. */
+ if (opal_check_token(OPAL_ELOG_RESEND))
+ opal_resend_pending_logs();
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
new file mode 100644
index 000000000..964f464b1
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-fadump.c
@@ -0,0 +1,726 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Firmware-Assisted Dump support on POWER platform (OPAL).
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "opal fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/mm.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/opal.h>
+#include <asm/fadump-internal.h>
+
+#include "opal-fadump.h"
+
+
+#ifdef CONFIG_PRESERVE_FA_DUMP
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * ensure crash data is preserved in hope that the subsequent memory
+ * preserving kernel boot is going to process this crash data.
+ */
+void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+ const struct opal_fadump_mem_struct *opal_fdm_active;
+ const __be32 *prop;
+ unsigned long dn;
+ u64 addr = 0;
+ s64 ret;
+
+ dn = of_get_flat_dt_subnode_by_name(node, "dump");
+ if (dn == -FDT_ERR_NOTFOUND)
+ return;
+
+ /*
+ * Check if dump has been initiated on last reboot.
+ */
+ prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
+ if (!prop)
+ return;
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_debug("Could not get Kernel metadata (%lld)\n", ret);
+ return;
+ }
+
+ /*
+ * Preserve memory only if kernel memory regions are registered
+ * with f/w for MPIPL.
+ */
+ addr = be64_to_cpu(addr);
+ pr_debug("Kernel metadata addr: %llx\n", addr);
+ opal_fdm_active = (void *)addr;
+ if (be16_to_cpu(opal_fdm_active->registered_regions) == 0)
+ return;
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr);
+ if ((ret != OPAL_SUCCESS) || !addr) {
+ pr_err("Failed to get boot memory tag (%lld)\n", ret);
+ return;
+ }
+
+ /*
+ * Memory below this address can be used for booting a
+ * capture kernel or petitboot kernel. Preserve everything
+ * above this address for processing crashdump.
+ */
+ fadump_conf->boot_mem_top = be64_to_cpu(addr);
+ pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top);
+
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+}
+
+#else /* CONFIG_PRESERVE_FA_DUMP */
+static const struct opal_fadump_mem_struct *opal_fdm_active;
+static const struct opal_mpipl_fadump *opal_cpu_metadata;
+static struct opal_fadump_mem_struct *opal_fdm;
+
+#ifdef CONFIG_OPAL_CORE
+extern bool kernel_initiated;
+#endif
+
+static int opal_fadump_unregister(struct fw_dump *fadump_conf);
+
+static void opal_fadump_update_config(struct fw_dump *fadump_conf,
+ const struct opal_fadump_mem_struct *fdm)
+{
+ pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt));
+
+ /*
+ * The destination address of the first boot memory region is the
+ * destination address of boot memory regions.
+ */
+ fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest);
+ pr_debug("Destination address of boot memory regions: %#016llx\n",
+ fadump_conf->boot_mem_dest_addr);
+
+ fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr);
+}
+
+/*
+ * This function is called in the capture kernel to get configuration details
+ * from metadata setup by the first kernel.
+ */
+static void __init opal_fadump_get_config(struct fw_dump *fadump_conf,
+ const struct opal_fadump_mem_struct *fdm)
+{
+ unsigned long base, size, last_end, hole_size;
+ int i;
+
+ if (!fadump_conf->dump_active)
+ return;
+
+ last_end = 0;
+ hole_size = 0;
+ fadump_conf->boot_memory_size = 0;
+
+ pr_debug("Boot memory regions:\n");
+ for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) {
+ base = be64_to_cpu(fdm->rgn[i].src);
+ size = be64_to_cpu(fdm->rgn[i].size);
+ pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size);
+
+ fadump_conf->boot_mem_addr[i] = base;
+ fadump_conf->boot_mem_sz[i] = size;
+ fadump_conf->boot_memory_size += size;
+ hole_size += (base - last_end);
+
+ last_end = base + size;
+ }
+
+ /*
+ * Start address of reserve dump area (permanent reservation) for
+ * re-registering FADump after dump capture.
+ */
+ fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest);
+
+ /*
+ * Rarely, but it can so happen that system crashes before all
+ * boot memory regions are registered for MPIPL. In such
+ * cases, warn that the vmcore may not be accurate and proceed
+ * anyway as that is the best bet considering free pages, cache
+ * pages, user pages, etc are usually filtered out.
+ *
+ * Hope the memory that could not be preserved only has pages
+ * that are usually filtered out while saving the vmcore.
+ */
+ if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) {
+ pr_warn("Not all memory regions were saved!!!\n");
+ pr_warn(" Unsaved memory regions:\n");
+ i = be16_to_cpu(fdm->registered_regions);
+ while (i < be16_to_cpu(fdm->region_cnt)) {
+ pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n",
+ i, be64_to_cpu(fdm->rgn[i].src),
+ be64_to_cpu(fdm->rgn[i].size));
+ i++;
+ }
+
+ pr_warn("If the unsaved regions only contain pages that are filtered out (eg. free/user pages), the vmcore should still be usable.\n");
+ pr_warn("WARNING: If the unsaved regions contain kernel pages, the vmcore will be corrupted.\n");
+ }
+
+ fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size);
+ fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt);
+ opal_fadump_update_config(fadump_conf, fdm);
+}
+
+/* Initialize kernel metadata */
+static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm)
+{
+ fdm->version = OPAL_FADUMP_VERSION;
+ fdm->region_cnt = cpu_to_be16(0);
+ fdm->registered_regions = cpu_to_be16(0);
+ fdm->fadumphdr_addr = cpu_to_be64(0);
+}
+
+static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf)
+{
+ u64 addr = fadump_conf->reserve_dump_area_start;
+ u16 reg_cnt;
+ int i;
+
+ opal_fdm = __va(fadump_conf->kernel_metadata);
+ opal_fadump_init_metadata(opal_fdm);
+
+ /* Boot memory regions */
+ reg_cnt = be16_to_cpu(opal_fdm->region_cnt);
+ for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) {
+ opal_fdm->rgn[i].src = cpu_to_be64(fadump_conf->boot_mem_addr[i]);
+ opal_fdm->rgn[i].dest = cpu_to_be64(addr);
+ opal_fdm->rgn[i].size = cpu_to_be64(fadump_conf->boot_mem_sz[i]);
+
+ reg_cnt++;
+ addr += fadump_conf->boot_mem_sz[i];
+ }
+ opal_fdm->region_cnt = cpu_to_be16(reg_cnt);
+
+ /*
+ * Kernel metadata is passed to f/w and retrieved in capture kernel.
+ * So, use it to save fadump header address instead of calculating it.
+ */
+ opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) +
+ fadump_conf->boot_memory_size);
+
+ opal_fadump_update_config(fadump_conf, opal_fdm);
+
+ return addr;
+}
+
+static u64 opal_fadump_get_metadata_size(void)
+{
+ return PAGE_ALIGN(sizeof(struct opal_fadump_mem_struct));
+}
+
+static int opal_fadump_setup_metadata(struct fw_dump *fadump_conf)
+{
+ int err = 0;
+ s64 ret;
+
+ /*
+ * Use the last page(s) in FADump memory reservation for
+ * kernel metadata.
+ */
+ fadump_conf->kernel_metadata = (fadump_conf->reserve_dump_area_start +
+ fadump_conf->reserve_dump_area_size -
+ opal_fadump_get_metadata_size());
+ pr_info("Kernel metadata addr: %llx\n", fadump_conf->kernel_metadata);
+
+ /* Initialize kernel metadata before registering the address with f/w */
+ opal_fdm = __va(fadump_conf->kernel_metadata);
+ opal_fadump_init_metadata(opal_fdm);
+
+ /*
+ * Register metadata address with f/w. Can be retrieved in
+ * the capture kernel.
+ */
+ ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL,
+ fadump_conf->kernel_metadata);
+ if (ret != OPAL_SUCCESS) {
+ pr_err("Failed to set kernel metadata tag!\n");
+ err = -EPERM;
+ }
+
+ /*
+ * Register boot memory top address with f/w. Should be retrieved
+ * by a kernel that intends to preserve crash'ed kernel's memory.
+ */
+ ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_BOOT_MEM,
+ fadump_conf->boot_mem_top);
+ if (ret != OPAL_SUCCESS) {
+ pr_err("Failed to set boot memory tag!\n");
+ err = -EPERM;
+ }
+
+ return err;
+}
+
+static u64 opal_fadump_get_bootmem_min(void)
+{
+ return OPAL_FADUMP_MIN_BOOT_MEM;
+}
+
+static int opal_fadump_register(struct fw_dump *fadump_conf)
+{
+ s64 rc = OPAL_PARAMETER;
+ u16 registered_regs;
+ int i, err = -EIO;
+
+ registered_regs = be16_to_cpu(opal_fdm->registered_regions);
+ for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) {
+ rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE,
+ be64_to_cpu(opal_fdm->rgn[i].src),
+ be64_to_cpu(opal_fdm->rgn[i].dest),
+ be64_to_cpu(opal_fdm->rgn[i].size));
+ if (rc != OPAL_SUCCESS)
+ break;
+
+ registered_regs++;
+ }
+ opal_fdm->registered_regions = cpu_to_be16(registered_regs);
+
+ switch (rc) {
+ case OPAL_SUCCESS:
+ pr_info("Registration is successful!\n");
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ case OPAL_RESOURCE:
+ /* If MAX regions limit in f/w is hit, warn and proceed. */
+ pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n",
+ (be16_to_cpu(opal_fdm->region_cnt) -
+ be16_to_cpu(opal_fdm->registered_regions)));
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ case OPAL_PARAMETER:
+ pr_err("Failed to register. Parameter Error(%lld).\n", rc);
+ break;
+ case OPAL_HARDWARE:
+ pr_err("Support not available.\n");
+ fadump_conf->fadump_supported = 0;
+ fadump_conf->fadump_enabled = 0;
+ break;
+ default:
+ pr_err("Failed to register. Unknown Error(%lld).\n", rc);
+ break;
+ }
+
+ /*
+ * If some regions were registered before OPAL_MPIPL_ADD_RANGE
+ * OPAL call failed, unregister all regions.
+ */
+ if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0))
+ opal_fadump_unregister(fadump_conf);
+
+ return err;
+}
+
+static int opal_fadump_unregister(struct fw_dump *fadump_conf)
+{
+ s64 rc;
+
+ rc = opal_mpipl_update(OPAL_MPIPL_REMOVE_ALL, 0, 0, 0);
+ if (rc) {
+ pr_err("Failed to un-register - unexpected Error(%lld).\n", rc);
+ return -EIO;
+ }
+
+ opal_fdm->registered_regions = cpu_to_be16(0);
+ fadump_conf->dump_registered = 0;
+ return 0;
+}
+
+static int opal_fadump_invalidate(struct fw_dump *fadump_conf)
+{
+ s64 rc;
+
+ rc = opal_mpipl_update(OPAL_MPIPL_FREE_PRESERVED_MEMORY, 0, 0, 0);
+ if (rc) {
+ pr_err("Failed to invalidate - unexpected Error(%lld).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_active = 0;
+ opal_fdm_active = NULL;
+ return 0;
+}
+
+static void opal_fadump_cleanup(struct fw_dump *fadump_conf)
+{
+ s64 ret;
+
+ ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, 0);
+ if (ret != OPAL_SUCCESS)
+ pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret);
+}
+
+/*
+ * Verify if CPU state data is available. If available, do a bit of sanity
+ * checking before processing this data.
+ */
+static bool __init is_opal_fadump_cpu_data_valid(struct fw_dump *fadump_conf)
+{
+ if (!opal_cpu_metadata)
+ return false;
+
+ fadump_conf->cpu_state_data_version =
+ be32_to_cpu(opal_cpu_metadata->cpu_data_version);
+ fadump_conf->cpu_state_entry_size =
+ be32_to_cpu(opal_cpu_metadata->cpu_data_size);
+ fadump_conf->cpu_state_dest_vaddr =
+ (u64)__va(be64_to_cpu(opal_cpu_metadata->region[0].dest));
+ fadump_conf->cpu_state_data_size =
+ be64_to_cpu(opal_cpu_metadata->region[0].size);
+
+ if (fadump_conf->cpu_state_data_version != HDAT_FADUMP_CPU_DATA_VER) {
+ pr_warn("Supported CPU state data version: %u, found: %d!\n",
+ HDAT_FADUMP_CPU_DATA_VER,
+ fadump_conf->cpu_state_data_version);
+ pr_warn("WARNING: F/W using newer CPU state data format!!\n");
+ }
+
+ if ((fadump_conf->cpu_state_dest_vaddr == 0) ||
+ (fadump_conf->cpu_state_entry_size == 0) ||
+ (fadump_conf->cpu_state_entry_size >
+ fadump_conf->cpu_state_data_size)) {
+ pr_err("CPU state data is invalid. Ignoring!\n");
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Convert CPU state data saved at the time of crash into ELF notes.
+ *
+ * While the crashing CPU's register data is saved by the kernel, CPU state
+ * data for all CPUs is saved by f/w. In CPU state data provided by f/w,
+ * each register entry is of 16 bytes, a numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation. If this data is
+ * missing or in unsupported format, append crashing CPU's register data
+ * saved by the kernel in the PT_NOTE, to have something to work with in
+ * the vmcore file.
+ */
+static int __init
+opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf,
+ struct fadump_crash_info_header *fdh)
+{
+ u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
+ struct hdat_fadump_thread_hdr *thdr;
+ bool is_cpu_data_valid = false;
+ u32 num_cpus = 1, *note_buf;
+ struct pt_regs regs;
+ char *bufp;
+ int rc, i;
+
+ if (is_opal_fadump_cpu_data_valid(fadump_conf)) {
+ size_per_thread = fadump_conf->cpu_state_entry_size;
+ num_cpus = (fadump_conf->cpu_state_data_size / size_per_thread);
+ bufp = __va(fadump_conf->cpu_state_dest_vaddr);
+ is_cpu_data_valid = true;
+ }
+
+ rc = fadump_setup_cpu_notes_buf(num_cpus);
+ if (rc != 0)
+ return rc;
+
+ note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
+ if (!is_cpu_data_valid)
+ goto out;
+
+ /*
+ * Offset for register entries, entry size and registers count is
+ * duplicated in every thread header in keeping with HDAT format.
+ * Use these values from the first thread header.
+ */
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+ regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+ be32_to_cpu(thdr->offset));
+ reg_esize = be32_to_cpu(thdr->esize);
+ regs_cnt = be32_to_cpu(thdr->ecnt);
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("NumCpus : %u\n", num_cpus);
+ pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+ regs_offset, reg_esize, regs_cnt);
+
+ for (i = 0; i < num_cpus; i++, bufp += size_per_thread) {
+ thdr = (struct hdat_fadump_thread_hdr *)bufp;
+
+ thread_pir = be32_to_cpu(thdr->pir);
+ pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
+ i, thread_pir, thdr->core_state);
+
+ /*
+ * If this is kernel initiated crash, crashing_cpu would be set
+ * appropriately and register data of the crashing CPU saved by
+ * crashing kernel. Add this saved register data of crashing CPU
+ * to elf notes and populate the pt_regs for the remaining CPUs
+ * from register state data provided by firmware.
+ */
+ if (fdh->crashing_cpu == thread_pir) {
+ note_buf = fadump_regs_to_elf_notes(note_buf,
+ &fdh->regs);
+ pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+ fdh->crashing_cpu, fdh->regs.gpr[1],
+ fdh->regs.nip);
+ continue;
+ }
+
+ /*
+ * Register state data of MAX cores is provided by firmware,
+ * but some of this cores may not be active. So, while
+ * processing register state data, check core state and
+ * skip threads that belong to inactive cores.
+ */
+ if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
+ continue;
+
+ opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+ reg_esize, true, &regs);
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+ thread_pir, regs.gpr[1], regs.nip);
+ }
+
+out:
+ /*
+ * CPU state data is invalid/unsupported. Try appending crashing CPU's
+ * register data, if it is saved by the kernel.
+ */
+ if (fadump_conf->cpu_notes_buf_vaddr == (u64)note_buf) {
+ if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN) {
+ fadump_free_cpu_notes_buf();
+ return -ENODEV;
+ }
+
+ pr_warn("WARNING: appending only crashing CPU's register data\n");
+ note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
+ }
+
+ final_note(note_buf);
+
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
+ return 0;
+}
+
+static int __init opal_fadump_process(struct fw_dump *fadump_conf)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = -EINVAL;
+
+ if (!opal_fdm_active || !fadump_conf->fadumphdr_addr)
+ return rc;
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fadump_conf->fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ pr_err("Crash info header is not valid.\n");
+ return rc;
+ }
+
+#ifdef CONFIG_OPAL_CORE
+ /*
+ * If this is a kernel initiated crash, crashing_cpu would be set
+ * appropriately and register data of the crashing CPU saved by
+ * crashing kernel. Add this saved register data of crashing CPU
+ * to elf notes and populate the pt_regs for the remaining CPUs
+ * from register state data provided by firmware.
+ */
+ if (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN)
+ kernel_initiated = true;
+#endif
+
+ rc = opal_fadump_build_cpu_notes(fadump_conf, fdh);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return rc;
+}
+
+static void opal_fadump_region_show(struct fw_dump *fadump_conf,
+ struct seq_file *m)
+{
+ const struct opal_fadump_mem_struct *fdm_ptr;
+ u64 dumped_bytes = 0;
+ int i;
+
+ if (fadump_conf->dump_active)
+ fdm_ptr = opal_fdm_active;
+ else
+ fdm_ptr = opal_fdm;
+
+ for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) {
+ /*
+ * Only regions that are registered for MPIPL
+ * would have dump data.
+ */
+ if ((fadump_conf->dump_active) &&
+ (i < be16_to_cpu(fdm_ptr->registered_regions)))
+ dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size);
+
+ seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+ be64_to_cpu(fdm_ptr->rgn[i].src),
+ be64_to_cpu(fdm_ptr->rgn[i].dest));
+ seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+ be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes);
+ }
+
+ /* Dump is active. Show preserved area start address. */
+ if (fadump_conf->dump_active) {
+ seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n",
+ fadump_conf->boot_mem_top);
+ }
+}
+
+static void opal_fadump_trigger(struct fadump_crash_info_header *fdh,
+ const char *msg)
+{
+ int rc;
+
+ /*
+ * Unlike on pSeries platform, logical CPU number is not provided
+ * with architected register state data. So, store the crashing
+ * CPU's PIR instead to plug the appropriate register data for
+ * crashing CPU in the vmcore file.
+ */
+ fdh->crashing_cpu = (u32)mfspr(SPRN_PIR);
+
+ rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
+ if (rc == OPAL_UNSUPPORTED) {
+ pr_emerg("Reboot type %d not supported.\n",
+ OPAL_REBOOT_MPIPL);
+ } else if (rc == OPAL_HARDWARE)
+ pr_emerg("No backend support for MPIPL!\n");
+}
+
+static struct fadump_ops opal_fadump_ops = {
+ .fadump_init_mem_struct = opal_fadump_init_mem_struct,
+ .fadump_get_metadata_size = opal_fadump_get_metadata_size,
+ .fadump_setup_metadata = opal_fadump_setup_metadata,
+ .fadump_get_bootmem_min = opal_fadump_get_bootmem_min,
+ .fadump_register = opal_fadump_register,
+ .fadump_unregister = opal_fadump_unregister,
+ .fadump_invalidate = opal_fadump_invalidate,
+ .fadump_cleanup = opal_fadump_cleanup,
+ .fadump_process = opal_fadump_process,
+ .fadump_region_show = opal_fadump_region_show,
+ .fadump_trigger = opal_fadump_trigger,
+};
+
+void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+ const __be32 *prop;
+ unsigned long dn;
+ __be64 be_addr;
+ u64 addr = 0;
+ int i, len;
+ s64 ret;
+
+ /*
+ * Check if Firmware-Assisted Dump is supported. if yes, check
+ * if dump has been initiated on last reboot.
+ */
+ dn = of_get_flat_dt_subnode_by_name(node, "dump");
+ if (dn == -FDT_ERR_NOTFOUND) {
+ pr_debug("FADump support is missing!\n");
+ return;
+ }
+
+ if (!of_flat_dt_is_compatible(dn, "ibm,opal-dump")) {
+ pr_err("Support missing for this f/w version!\n");
+ return;
+ }
+
+ prop = of_get_flat_dt_prop(dn, "fw-load-area", &len);
+ if (prop) {
+ /*
+ * Each f/w load area is an (address,size) pair,
+ * 2 cells each, totalling 4 cells per range.
+ */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, end;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ end = base;
+ end += of_read_number(prop + (i * 4) + 2, 2);
+ if (end > OPAL_FADUMP_MIN_BOOT_MEM) {
+ pr_err("F/W load area: 0x%llx-0x%llx\n",
+ base, end);
+ pr_err("F/W version not supported!\n");
+ return;
+ }
+ }
+ }
+
+ fadump_conf->ops = &opal_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+
+ /*
+ * Firmware supports 32-bit field for size. Align it to PAGE_SIZE
+ * and request firmware to copy multiple kernel boot memory regions.
+ */
+ fadump_conf->max_copy_size = ALIGN_DOWN(U32_MAX, PAGE_SIZE);
+
+ /*
+ * Check if dump has been initiated on last reboot.
+ */
+ prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
+ if (!prop)
+ return;
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr);
+ if ((ret != OPAL_SUCCESS) || !be_addr) {
+ pr_err("Failed to get Kernel metadata (%lld)\n", ret);
+ return;
+ }
+
+ addr = be64_to_cpu(be_addr);
+ pr_debug("Kernel metadata addr: %llx\n", addr);
+
+ opal_fdm_active = __va(addr);
+ if (opal_fdm_active->version != OPAL_FADUMP_VERSION) {
+ pr_warn("Supported kernel metadata version: %u, found: %d!\n",
+ OPAL_FADUMP_VERSION, opal_fdm_active->version);
+ pr_warn("WARNING: Kernel metadata format mismatch identified! Core file maybe corrupted..\n");
+ }
+
+ /* Kernel regions not registered with f/w for MPIPL */
+ if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) {
+ opal_fdm_active = NULL;
+ return;
+ }
+
+ ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr);
+ if (be_addr) {
+ addr = be64_to_cpu(be_addr);
+ pr_debug("CPU metadata addr: %llx\n", addr);
+ opal_cpu_metadata = __va(addr);
+ }
+
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+ opal_fadump_get_config(fadump_conf, opal_fdm_active);
+}
+#endif /* !CONFIG_PRESERVE_FA_DUMP */
diff --git a/arch/powerpc/platforms/powernv/opal-fadump.h b/arch/powerpc/platforms/powernv/opal-fadump.h
new file mode 100644
index 000000000..3f715efb0
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-fadump.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump support on POWER platform (OPAL).
+ *
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _POWERNV_OPAL_FADUMP_H
+#define _POWERNV_OPAL_FADUMP_H
+
+#include <asm/reg.h>
+
+/*
+ * With kernel & initrd loaded at 512MB (with 256MB size), enforce a minimum
+ * boot memory size of 768MB to ensure f/w loading kernel and initrd doesn't
+ * mess with crash'ed kernel's memory during MPIPL.
+ */
+#define OPAL_FADUMP_MIN_BOOT_MEM (0x30000000UL)
+
+/*
+ * OPAL FADump metadata structure format version
+ *
+ * OPAL FADump kernel metadata structure stores kernel metadata needed to
+ * register-for/process crash dump. Format version is used to keep a tab on
+ * the changes in the structure format. The changes, if any, to the format
+ * are expected to be minimal and backward compatible.
+ */
+#define OPAL_FADUMP_VERSION 0x1
+
+/*
+ * OPAL FADump kernel metadata
+ *
+ * The address of this structure will be registered with f/w for retrieving
+ * in the capture kernel to process the crash dump.
+ */
+struct opal_fadump_mem_struct {
+ u8 version;
+ u8 reserved[3];
+ __be16 region_cnt; /* number of regions */
+ __be16 registered_regions; /* Regions registered for MPIPL */
+ __be64 fadumphdr_addr;
+ struct opal_mpipl_region rgn[FADUMP_MAX_MEM_REGS];
+} __packed;
+
+/*
+ * CPU state data
+ *
+ * CPU state data information is provided by f/w. The format for this data
+ * is defined in the HDAT spec. Version is used to keep a tab on the changes
+ * in this CPU state data format. Changes to this format are unlikely, but
+ * if there are any changes, please refer to latest HDAT specification.
+ */
+#define HDAT_FADUMP_CPU_DATA_VER 1
+
+#define HDAT_FADUMP_CORE_INACTIVE (0x0F)
+
+/* HDAT thread header for register entries */
+struct hdat_fadump_thread_hdr {
+ __be32 pir;
+ /* 0x00 - 0x0F - The corresponding stop state of the core */
+ u8 core_state;
+ u8 reserved[3];
+
+ __be32 offset; /* Offset to Register Entries array */
+ __be32 ecnt; /* Number of entries */
+ __be32 esize; /* Alloc size of each array entry in bytes */
+ __be32 eactsz; /* Actual size of each array entry in bytes */
+} __packed;
+
+/* Register types populated by f/w */
+#define HDAT_FADUMP_REG_TYPE_GPR 0x01
+#define HDAT_FADUMP_REG_TYPE_SPR 0x02
+
+/* ID numbers used by f/w while populating certain registers */
+#define HDAT_FADUMP_REG_ID_NIP 0x7D0
+#define HDAT_FADUMP_REG_ID_MSR 0x7D1
+#define HDAT_FADUMP_REG_ID_CCR 0x7D2
+
+/* HDAT register entry. */
+struct hdat_fadump_reg_entry {
+ __be32 reg_type;
+ __be32 reg_num;
+ __be64 reg_val;
+} __packed;
+
+static inline void opal_fadump_set_regval_regnum(struct pt_regs *regs,
+ u32 reg_type, u32 reg_num,
+ u64 reg_val)
+{
+ if (reg_type == HDAT_FADUMP_REG_TYPE_GPR) {
+ if (reg_num < 32)
+ regs->gpr[reg_num] = reg_val;
+ return;
+ }
+
+ switch (reg_num) {
+ case SPRN_CTR:
+ regs->ctr = reg_val;
+ break;
+ case SPRN_LR:
+ regs->link = reg_val;
+ break;
+ case SPRN_XER:
+ regs->xer = reg_val;
+ break;
+ case SPRN_DAR:
+ regs->dar = reg_val;
+ break;
+ case SPRN_DSISR:
+ regs->dsisr = reg_val;
+ break;
+ case HDAT_FADUMP_REG_ID_NIP:
+ regs->nip = reg_val;
+ break;
+ case HDAT_FADUMP_REG_ID_MSR:
+ regs->msr = reg_val;
+ break;
+ case HDAT_FADUMP_REG_ID_CCR:
+ regs->ccr = reg_val;
+ break;
+ }
+}
+
+static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt,
+ unsigned int reg_entry_size,
+ bool cpu_endian,
+ struct pt_regs *regs)
+{
+ struct hdat_fadump_reg_entry *reg_entry;
+ u64 val;
+ int i;
+
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) {
+ reg_entry = (struct hdat_fadump_reg_entry *)bufp;
+ val = (cpu_endian ? be64_to_cpu(reg_entry->reg_val) :
+ (u64)(reg_entry->reg_val));
+ opal_fadump_set_regval_regnum(regs,
+ be32_to_cpu(reg_entry->reg_type),
+ be32_to_cpu(reg_entry->reg_num),
+ val);
+ }
+}
+
+#endif /* _POWERNV_OPAL_FADUMP_H */
diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c
new file mode 100644
index 000000000..d5ea04e8e
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-flash.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL Firmware Update Interface
+ *
+ * Copyright 2013 IBM Corp.
+ */
+
+#define DEBUG
+
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+
+#include <asm/opal.h>
+
+/* FLASH status codes */
+#define FLASH_NO_OP -1099 /* No operation initiated by user */
+#define FLASH_NO_AUTH -9002 /* Not a service authority partition */
+
+/* Validate image status values */
+#define VALIDATE_IMG_READY -1001 /* Image ready for validation */
+#define VALIDATE_IMG_INCOMPLETE -1002 /* User copied < VALIDATE_BUF_SIZE */
+
+/* Manage image status values */
+#define MANAGE_ACTIVE_ERR -9001 /* Cannot overwrite active img */
+
+/* Flash image status values */
+#define FLASH_IMG_READY 0 /* Img ready for flash on reboot */
+#define FLASH_INVALID_IMG -1003 /* Flash image shorter than expected */
+#define FLASH_IMG_NULL_DATA -1004 /* Bad data in sg list entry */
+#define FLASH_IMG_BAD_LEN -1005 /* Bad length in sg list entry */
+
+/* Manage operation tokens */
+#define FLASH_REJECT_TMP_SIDE 0 /* Reject temporary fw image */
+#define FLASH_COMMIT_TMP_SIDE 1 /* Commit temporary fw image */
+
+/* Update tokens */
+#define FLASH_UPDATE_CANCEL 0 /* Cancel update request */
+#define FLASH_UPDATE_INIT 1 /* Initiate update */
+
+/* Validate image update result tokens */
+#define VALIDATE_TMP_UPDATE 0 /* T side will be updated */
+#define VALIDATE_FLASH_AUTH 1 /* Partition does not have authority */
+#define VALIDATE_INVALID_IMG 2 /* Candidate image is not valid */
+#define VALIDATE_CUR_UNKNOWN 3 /* Current fixpack level is unknown */
+/*
+ * Current T side will be committed to P side before being replace with new
+ * image, and the new image is downlevel from current image
+ */
+#define VALIDATE_TMP_COMMIT_DL 4
+/*
+ * Current T side will be committed to P side before being replaced with new
+ * image
+ */
+#define VALIDATE_TMP_COMMIT 5
+/*
+ * T side will be updated with a downlevel image
+ */
+#define VALIDATE_TMP_UPDATE_DL 6
+/*
+ * The candidate image's release date is later than the system's firmware
+ * service entitlement date - service warranty period has expired
+ */
+#define VALIDATE_OUT_OF_WRNTY 7
+
+/* Validate buffer size */
+#define VALIDATE_BUF_SIZE 4096
+
+/* XXX: Assume candidate image size is <= 1GB */
+#define MAX_IMAGE_SIZE 0x40000000
+
+/* Image status */
+enum {
+ IMAGE_INVALID,
+ IMAGE_LOADING,
+ IMAGE_READY,
+};
+
+/* Candidate image data */
+struct image_data_t {
+ int status;
+ void *data;
+ uint32_t size;
+};
+
+/* Candidate image header */
+struct image_header_t {
+ uint16_t magic;
+ uint16_t version;
+ uint32_t size;
+};
+
+struct validate_flash_t {
+ int status; /* Return status */
+ void *buf; /* Candidate image buffer */
+ uint32_t buf_size; /* Image size */
+ uint32_t result; /* Update results token */
+};
+
+struct manage_flash_t {
+ int status; /* Return status */
+};
+
+struct update_flash_t {
+ int status; /* Return status */
+};
+
+static struct image_header_t image_header;
+static struct image_data_t image_data;
+static struct validate_flash_t validate_flash_data;
+static struct manage_flash_t manage_flash_data;
+
+/* Initialize update_flash_data status to No Operation */
+static struct update_flash_t update_flash_data = {
+ .status = FLASH_NO_OP,
+};
+
+static DEFINE_MUTEX(image_data_mutex);
+
+/*
+ * Validate candidate image
+ */
+static inline void opal_flash_validate(void)
+{
+ long ret;
+ void *buf = validate_flash_data.buf;
+ __be32 size = cpu_to_be32(validate_flash_data.buf_size);
+ __be32 result;
+
+ ret = opal_validate_flash(__pa(buf), &size, &result);
+
+ validate_flash_data.status = ret;
+ validate_flash_data.buf_size = be32_to_cpu(size);
+ validate_flash_data.result = be32_to_cpu(result);
+}
+
+/*
+ * Validate output format:
+ * validate result token
+ * current image version details
+ * new image version details
+ */
+static ssize_t validate_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct validate_flash_t *args_buf = &validate_flash_data;
+ int len;
+
+ /* Candidate image is not validated */
+ if (args_buf->status < VALIDATE_TMP_UPDATE) {
+ len = sprintf(buf, "%d\n", args_buf->status);
+ goto out;
+ }
+
+ /* Result token */
+ len = sprintf(buf, "%d\n", args_buf->result);
+
+ /* Current and candidate image version details */
+ if ((args_buf->result != VALIDATE_TMP_UPDATE) &&
+ (args_buf->result < VALIDATE_CUR_UNKNOWN))
+ goto out;
+
+ if (args_buf->buf_size > (VALIDATE_BUF_SIZE - len)) {
+ memcpy(buf + len, args_buf->buf, VALIDATE_BUF_SIZE - len);
+ len = VALIDATE_BUF_SIZE;
+ } else {
+ memcpy(buf + len, args_buf->buf, args_buf->buf_size);
+ len += args_buf->buf_size;
+ }
+out:
+ /* Set status to default */
+ args_buf->status = FLASH_NO_OP;
+ return len;
+}
+
+/*
+ * Validate candidate firmware image
+ *
+ * Note:
+ * We are only interested in first 4K bytes of the
+ * candidate image.
+ */
+static ssize_t validate_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct validate_flash_t *args_buf = &validate_flash_data;
+
+ if (buf[0] != '1')
+ return -EINVAL;
+
+ mutex_lock(&image_data_mutex);
+
+ if (image_data.status != IMAGE_READY ||
+ image_data.size < VALIDATE_BUF_SIZE) {
+ args_buf->result = VALIDATE_INVALID_IMG;
+ args_buf->status = VALIDATE_IMG_INCOMPLETE;
+ goto out;
+ }
+
+ /* Copy first 4k bytes of candidate image */
+ memcpy(args_buf->buf, image_data.data, VALIDATE_BUF_SIZE);
+
+ args_buf->status = VALIDATE_IMG_READY;
+ args_buf->buf_size = VALIDATE_BUF_SIZE;
+
+ /* Validate candidate image */
+ opal_flash_validate();
+
+out:
+ mutex_unlock(&image_data_mutex);
+ return count;
+}
+
+/*
+ * Manage flash routine
+ */
+static inline void opal_flash_manage(uint8_t op)
+{
+ struct manage_flash_t *const args_buf = &manage_flash_data;
+
+ args_buf->status = opal_manage_flash(op);
+}
+
+/*
+ * Show manage flash status
+ */
+static ssize_t manage_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct manage_flash_t *const args_buf = &manage_flash_data;
+ int rc;
+
+ rc = sprintf(buf, "%d\n", args_buf->status);
+ /* Set status to default*/
+ args_buf->status = FLASH_NO_OP;
+ return rc;
+}
+
+/*
+ * Manage operations:
+ * 0 - Reject
+ * 1 - Commit
+ */
+static ssize_t manage_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ uint8_t op;
+ switch (buf[0]) {
+ case '0':
+ op = FLASH_REJECT_TMP_SIDE;
+ break;
+ case '1':
+ op = FLASH_COMMIT_TMP_SIDE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /* commit/reject temporary image */
+ opal_flash_manage(op);
+ return count;
+}
+
+/*
+ * OPAL update flash
+ */
+static int opal_flash_update(int op)
+{
+ struct opal_sg_list *list;
+ unsigned long addr;
+ int64_t rc = OPAL_PARAMETER;
+
+ if (op == FLASH_UPDATE_CANCEL) {
+ pr_alert("FLASH: Image update cancelled\n");
+ addr = '\0';
+ goto flash;
+ }
+
+ list = opal_vmalloc_to_sg_list(image_data.data, image_data.size);
+ if (!list)
+ goto invalid_img;
+
+ /* First entry address */
+ addr = __pa(list);
+
+flash:
+ rc = opal_update_flash(addr);
+
+invalid_img:
+ return rc;
+}
+
+/* This gets called just before system reboots */
+void opal_flash_update_print_message(void)
+{
+ if (update_flash_data.status != FLASH_IMG_READY)
+ return;
+
+ pr_alert("FLASH: Flashing new firmware\n");
+ pr_alert("FLASH: Image is %u bytes\n", image_data.size);
+ pr_alert("FLASH: Performing flash and reboot/shutdown\n");
+ pr_alert("FLASH: This will take several minutes. Do not power off!\n");
+
+ /* Small delay to help getting the above message out */
+ msleep(500);
+}
+
+/*
+ * Show candidate image status
+ */
+static ssize_t update_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct update_flash_t *const args_buf = &update_flash_data;
+ return sprintf(buf, "%d\n", args_buf->status);
+}
+
+/*
+ * Set update image flag
+ * 1 - Flash new image
+ * 0 - Cancel flash request
+ */
+static ssize_t update_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct update_flash_t *const args_buf = &update_flash_data;
+ int rc = count;
+
+ mutex_lock(&image_data_mutex);
+
+ switch (buf[0]) {
+ case '0':
+ if (args_buf->status == FLASH_IMG_READY)
+ opal_flash_update(FLASH_UPDATE_CANCEL);
+ args_buf->status = FLASH_NO_OP;
+ break;
+ case '1':
+ /* Image is loaded? */
+ if (image_data.status == IMAGE_READY)
+ args_buf->status =
+ opal_flash_update(FLASH_UPDATE_INIT);
+ else
+ args_buf->status = FLASH_INVALID_IMG;
+ break;
+ default:
+ rc = -EINVAL;
+ }
+
+ mutex_unlock(&image_data_mutex);
+ return rc;
+}
+
+/*
+ * Free image buffer
+ */
+static void free_image_buf(void)
+{
+ void *addr;
+ int size;
+
+ addr = image_data.data;
+ size = PAGE_ALIGN(image_data.size);
+ while (size > 0) {
+ ClearPageReserved(vmalloc_to_page(addr));
+ addr += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ vfree(image_data.data);
+ image_data.data = NULL;
+ image_data.status = IMAGE_INVALID;
+}
+
+/*
+ * Allocate image buffer.
+ */
+static int alloc_image_buf(char *buffer, size_t count)
+{
+ void *addr;
+ int size;
+
+ if (count < sizeof(image_header)) {
+ pr_warn("FLASH: Invalid candidate image\n");
+ return -EINVAL;
+ }
+
+ memcpy(&image_header, (void *)buffer, sizeof(image_header));
+ image_data.size = be32_to_cpu(image_header.size);
+ pr_debug("FLASH: Candidate image size = %u\n", image_data.size);
+
+ if (image_data.size > MAX_IMAGE_SIZE) {
+ pr_warn("FLASH: Too large image\n");
+ return -EINVAL;
+ }
+ if (image_data.size < VALIDATE_BUF_SIZE) {
+ pr_warn("FLASH: Image is shorter than expected\n");
+ return -EINVAL;
+ }
+
+ image_data.data = vzalloc(PAGE_ALIGN(image_data.size));
+ if (!image_data.data) {
+ pr_err("%s : Failed to allocate memory\n", __func__);
+ return -ENOMEM;
+ }
+
+ /* Pin memory */
+ addr = image_data.data;
+ size = PAGE_ALIGN(image_data.size);
+ while (size > 0) {
+ SetPageReserved(vmalloc_to_page(addr));
+ addr += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+
+ image_data.status = IMAGE_LOADING;
+ return 0;
+}
+
+/*
+ * Copy candidate image
+ *
+ * Parse candidate image header to get total image size
+ * and pre-allocate required memory.
+ */
+static ssize_t image_data_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buffer, loff_t pos, size_t count)
+{
+ int rc;
+
+ mutex_lock(&image_data_mutex);
+
+ /* New image ? */
+ if (pos == 0) {
+ /* Free memory, if already allocated */
+ if (image_data.data)
+ free_image_buf();
+
+ /* Cancel outstanding image update request */
+ if (update_flash_data.status == FLASH_IMG_READY)
+ opal_flash_update(FLASH_UPDATE_CANCEL);
+
+ /* Allocate memory */
+ rc = alloc_image_buf(buffer, count);
+ if (rc)
+ goto out;
+ }
+
+ if (image_data.status != IMAGE_LOADING) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ if ((pos + count) > image_data.size) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+ memcpy(image_data.data + pos, (void *)buffer, count);
+ rc = count;
+
+ /* Set image status */
+ if ((pos + count) == image_data.size) {
+ pr_debug("FLASH: Candidate image loaded....\n");
+ image_data.status = IMAGE_READY;
+ }
+
+out:
+ mutex_unlock(&image_data_mutex);
+ return rc;
+}
+
+/*
+ * sysfs interface :
+ * OPAL uses below sysfs files for code update.
+ * We create these files under /sys/firmware/opal.
+ *
+ * image : Interface to load candidate firmware image
+ * validate_flash : Validate firmware image
+ * manage_flash : Commit/Reject firmware image
+ * update_flash : Flash new firmware image
+ *
+ */
+static const struct bin_attribute image_data_attr = {
+ .attr = {.name = "image", .mode = 0200},
+ .size = MAX_IMAGE_SIZE, /* Limit image size */
+ .write = image_data_write,
+};
+
+static struct kobj_attribute validate_attribute =
+ __ATTR(validate_flash, 0600, validate_show, validate_store);
+
+static struct kobj_attribute manage_attribute =
+ __ATTR(manage_flash, 0600, manage_show, manage_store);
+
+static struct kobj_attribute update_attribute =
+ __ATTR(update_flash, 0600, update_show, update_store);
+
+static struct attribute *image_op_attrs[] = {
+ &validate_attribute.attr,
+ &manage_attribute.attr,
+ &update_attribute.attr,
+ NULL /* need to NULL terminate the list of attributes */
+};
+
+static const struct attribute_group image_op_attr_group = {
+ .attrs = image_op_attrs,
+};
+
+void __init opal_flash_update_init(void)
+{
+ int ret;
+
+ /* Firmware update is not supported by firmware */
+ if (!opal_check_token(OPAL_FLASH_VALIDATE))
+ return;
+
+ /* Allocate validate image buffer */
+ validate_flash_data.buf = kzalloc(VALIDATE_BUF_SIZE, GFP_KERNEL);
+ if (!validate_flash_data.buf) {
+ pr_err("%s : Failed to allocate memory\n", __func__);
+ return;
+ }
+
+ /* Make sure /sys/firmware/opal directory is created */
+ if (!opal_kobj) {
+ pr_warn("FLASH: opal kobject is not available\n");
+ goto nokobj;
+ }
+
+ /* Create the sysfs files */
+ ret = sysfs_create_group(opal_kobj, &image_op_attr_group);
+ if (ret) {
+ pr_warn("FLASH: Failed to create sysfs files\n");
+ goto nokobj;
+ }
+
+ ret = sysfs_create_bin_file(opal_kobj, &image_data_attr);
+ if (ret) {
+ pr_warn("FLASH: Failed to create sysfs files\n");
+ goto nosysfs_file;
+ }
+
+ /* Set default status */
+ validate_flash_data.status = FLASH_NO_OP;
+ manage_flash_data.status = FLASH_NO_OP;
+ update_flash_data.status = FLASH_NO_OP;
+ image_data.status = IMAGE_INVALID;
+ return;
+
+nosysfs_file:
+ sysfs_remove_group(opal_kobj, &image_op_attr_group);
+
+nokobj:
+ kfree(validate_flash_data.buf);
+ return;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
new file mode 100644
index 000000000..f0c1830de
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -0,0 +1,381 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * OPAL hypervisor Maintenance interrupt handling support in PowerNV.
+ *
+ * Copyright 2014 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+#include <asm/cputable.h>
+#include <asm/machdep.h>
+
+#include "powernv.h"
+
+static int opal_hmi_handler_nb_init;
+struct OpalHmiEvtNode {
+ struct list_head list;
+ struct OpalHMIEvent hmi_evt;
+};
+
+struct xstop_reason {
+ uint32_t xstop_reason;
+ const char *unit_failed;
+ const char *description;
+};
+
+static LIST_HEAD(opal_hmi_evt_list);
+static DEFINE_SPINLOCK(opal_hmi_evt_lock);
+
+static void print_core_checkstop_reason(const char *level,
+ struct OpalHMIEvent *hmi_evt)
+{
+ int i;
+ static const struct xstop_reason xstop_reason[] = {
+ { CORE_CHECKSTOP_IFU_REGFILE, "IFU",
+ "RegFile core check stop" },
+ { CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" },
+ { CORE_CHECKSTOP_PC_DURING_RECOV, "PC",
+ "Core checkstop during recovery" },
+ { CORE_CHECKSTOP_ISU_REGFILE, "ISU",
+ "RegFile core check stop (mapper error)" },
+ { CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" },
+ { CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" },
+ { CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" },
+ { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC",
+ "Recovery in maintenance mode" },
+ { CORE_CHECKSTOP_LSU_REGFILE, "LSU",
+ "RegFile core check stop" },
+ { CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC",
+ "Forward Progress Error" },
+ { CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" },
+ { CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" },
+ { CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC",
+ "Hypervisor Resource error - core check stop" },
+ { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC",
+ "Hang Recovery Failed (core check stop)" },
+ { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC",
+ "Ambiguous Hang Detected (unknown source)" },
+ { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC",
+ "Debug Trigger Error inject" },
+ { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC",
+ "Hypervisor check stop via SPRC/SPRD" },
+ };
+
+ /* Validity check */
+ if (!hmi_evt->u.xstop_error.xstop_reason) {
+ printk("%s Unknown Core check stop.\n", level);
+ return;
+ }
+
+ printk("%s CPU PIR: %08x\n", level,
+ be32_to_cpu(hmi_evt->u.xstop_error.u.pir));
+ for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
+ if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
+ xstop_reason[i].xstop_reason)
+ printk("%s [Unit: %-3s] %s\n", level,
+ xstop_reason[i].unit_failed,
+ xstop_reason[i].description);
+}
+
+static void print_nx_checkstop_reason(const char *level,
+ struct OpalHMIEvent *hmi_evt)
+{
+ int i;
+ static const struct xstop_reason xstop_reason[] = {
+ { NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine",
+ "SHM invalid state error" },
+ { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine",
+ "DMA invalid state error bit 15" },
+ { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine",
+ "DMA invalid state error bit 16" },
+ { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 0 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 1 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 2 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 3 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 4 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 5 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 6 invalid state error" },
+ { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine",
+ "Channel 7 invalid state error" },
+ { NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine",
+ "UE error on CRB(CSB address, CCB)" },
+ { NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine",
+ "SUE error on CRB(CSB address, CCB)" },
+ { NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface",
+ "CRB Kill ISN received while holding ISN with UE error" },
+ };
+
+ /* Validity check */
+ if (!hmi_evt->u.xstop_error.xstop_reason) {
+ printk("%s Unknown NX check stop.\n", level);
+ return;
+ }
+
+ printk("%s NX checkstop on CHIP ID: %x\n", level,
+ be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
+ for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
+ if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
+ xstop_reason[i].xstop_reason)
+ printk("%s [Unit: %-3s] %s\n", level,
+ xstop_reason[i].unit_failed,
+ xstop_reason[i].description);
+}
+
+static void print_npu_checkstop_reason(const char *level,
+ struct OpalHMIEvent *hmi_evt)
+{
+ uint8_t reason, reason_count, i;
+
+ /*
+ * We may not have a checkstop reason on some combination of
+ * hardware and/or skiboot version
+ */
+ if (!hmi_evt->u.xstop_error.xstop_reason) {
+ printk("%s NPU checkstop on chip %x\n", level,
+ be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
+ return;
+ }
+
+ /*
+ * NPU2 has 3 FIRs. Reason encoded on a byte as:
+ * 2 bits for the FIR number
+ * 6 bits for the bit number
+ * It may be possible to find several reasons.
+ *
+ * We don't display a specific message per FIR bit as there
+ * are too many and most are meaningless without the workbook
+ * and/or hw team help anyway.
+ */
+ reason_count = sizeof(hmi_evt->u.xstop_error.xstop_reason) /
+ sizeof(reason);
+ for (i = 0; i < reason_count; i++) {
+ reason = (hmi_evt->u.xstop_error.xstop_reason >> (8 * i)) & 0xFF;
+ if (reason)
+ printk("%s NPU checkstop on chip %x: FIR%d bit %d is set\n",
+ level,
+ be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id),
+ reason >> 6, reason & 0x3F);
+ }
+}
+
+static void print_checkstop_reason(const char *level,
+ struct OpalHMIEvent *hmi_evt)
+{
+ uint8_t type = hmi_evt->u.xstop_error.xstop_type;
+ switch (type) {
+ case CHECKSTOP_TYPE_CORE:
+ print_core_checkstop_reason(level, hmi_evt);
+ break;
+ case CHECKSTOP_TYPE_NX:
+ print_nx_checkstop_reason(level, hmi_evt);
+ break;
+ case CHECKSTOP_TYPE_NPU:
+ print_npu_checkstop_reason(level, hmi_evt);
+ break;
+ default:
+ printk("%s Unknown Malfunction Alert of type %d\n",
+ level, type);
+ break;
+ }
+}
+
+static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
+{
+ const char *level, *sevstr, *error_info;
+ static const char *hmi_error_types[] = {
+ "Malfunction Alert",
+ "Processor Recovery done",
+ "Processor recovery occurred again",
+ "Processor recovery occurred for masked error",
+ "Timer facility experienced an error",
+ "TFMR SPR is corrupted",
+ "UPS (Uninterrupted Power System) Overflow indication",
+ "An XSCOM operation failure",
+ "An XSCOM operation completed",
+ "SCOM has set a reserved FIR bit to cause recovery",
+ "Debug trigger has set a reserved FIR bit to cause recovery",
+ "A hypervisor resource error occurred",
+ "CAPP recovery process is in progress",
+ };
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+ /* Print things out */
+ if (hmi_evt->version < OpalHMIEvt_V1) {
+ pr_err("HMI Interrupt, Unknown event version %d !\n",
+ hmi_evt->version);
+ return;
+ }
+ switch (hmi_evt->severity) {
+ case OpalHMI_SEV_NO_ERROR:
+ level = KERN_INFO;
+ sevstr = "Harmless";
+ break;
+ case OpalHMI_SEV_WARNING:
+ level = KERN_WARNING;
+ sevstr = "";
+ break;
+ case OpalHMI_SEV_ERROR_SYNC:
+ level = KERN_ERR;
+ sevstr = "Severe";
+ break;
+ case OpalHMI_SEV_FATAL:
+ default:
+ level = KERN_ERR;
+ sevstr = "Fatal";
+ break;
+ }
+
+ if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || __ratelimit(&rs)) {
+ printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
+ level, sevstr,
+ hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
+ "Recovered" : "Not recovered");
+ error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
+ hmi_error_types[hmi_evt->type]
+ : "Unknown";
+ printk("%s Error detail: %s\n", level, error_info);
+ printk("%s HMER: %016llx\n", level,
+ be64_to_cpu(hmi_evt->hmer));
+ if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
+ (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
+ printk("%s TFMR: %016llx\n", level,
+ be64_to_cpu(hmi_evt->tfmr));
+ }
+
+ if (hmi_evt->version < OpalHMIEvt_V2)
+ return;
+
+ /* OpalHMIEvt_V2 and above provides reason for malfunction alert. */
+ if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT)
+ print_checkstop_reason(level, hmi_evt);
+}
+
+static void hmi_event_handler(struct work_struct *work)
+{
+ unsigned long flags;
+ struct OpalHMIEvent *hmi_evt;
+ struct OpalHmiEvtNode *msg_node;
+ uint8_t disposition;
+ struct opal_msg msg;
+ int unrecoverable = 0;
+
+ spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+ while (!list_empty(&opal_hmi_evt_list)) {
+ msg_node = list_entry(opal_hmi_evt_list.next,
+ struct OpalHmiEvtNode, list);
+ list_del(&msg_node->list);
+ spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+ hmi_evt = (struct OpalHMIEvent *) &msg_node->hmi_evt;
+ print_hmi_event_info(hmi_evt);
+ disposition = hmi_evt->disposition;
+ kfree(msg_node);
+
+ /*
+ * Check if HMI event has been recovered or not. If not
+ * then kernel can't continue, we need to panic.
+ * But before we do that, display all the HMI event
+ * available on the list and set unrecoverable flag to 1.
+ */
+ if (disposition != OpalHMI_DISPOSITION_RECOVERED)
+ unrecoverable = 1;
+
+ spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+ }
+ spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+ if (unrecoverable) {
+ /* Pull all HMI events from OPAL before we panic. */
+ while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
+ u32 type;
+
+ type = be32_to_cpu(msg.msg_type);
+
+ /* skip if not HMI event */
+ if (type != OPAL_MSG_HMI_EVT)
+ continue;
+
+ /* HMI event info starts from param[0] */
+ hmi_evt = (struct OpalHMIEvent *)&msg.params[0];
+ print_hmi_event_info(hmi_evt);
+ }
+
+ pnv_platform_error_reboot(NULL, "Unrecoverable HMI exception");
+ }
+}
+
+static DECLARE_WORK(hmi_event_work, hmi_event_handler);
+/*
+ * opal_handle_hmi_event - notifier handler that queues up HMI events
+ * to be preocessed later.
+ */
+static int opal_handle_hmi_event(struct notifier_block *nb,
+ unsigned long msg_type, void *msg)
+{
+ unsigned long flags;
+ struct OpalHMIEvent *hmi_evt;
+ struct opal_msg *hmi_msg = msg;
+ struct OpalHmiEvtNode *msg_node;
+
+ /* Sanity Checks */
+ if (msg_type != OPAL_MSG_HMI_EVT)
+ return 0;
+
+ /* HMI event info starts from param[0] */
+ hmi_evt = (struct OpalHMIEvent *)&hmi_msg->params[0];
+
+ /* Delay the logging of HMI events to workqueue. */
+ msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+ if (!msg_node) {
+ pr_err("HMI: out of memory, Opal message event not handled\n");
+ return -ENOMEM;
+ }
+ memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt));
+
+ spin_lock_irqsave(&opal_hmi_evt_lock, flags);
+ list_add(&msg_node->list, &opal_hmi_evt_list);
+ spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
+
+ schedule_work(&hmi_event_work);
+ return 0;
+}
+
+static struct notifier_block opal_hmi_handler_nb = {
+ .notifier_call = opal_handle_hmi_event,
+ .next = NULL,
+ .priority = 0,
+};
+
+int __init opal_hmi_handler_init(void)
+{
+ int ret;
+
+ if (!opal_hmi_handler_nb_init) {
+ ret = opal_message_notifier_register(
+ OPAL_MSG_HMI_EVT, &opal_hmi_handler_nb);
+ if (ret) {
+ pr_err("%s: Can't register OPAL event notifier (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+ opal_hmi_handler_nb_init = 1;
+ }
+ return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
new file mode 100644
index 000000000..828fc4d88
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * OPAL IMC interface detection driver
+ * Supported on POWERNV platform
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/crash_dump.h>
+#include <linux/debugfs.h>
+#include <asm/opal.h>
+#include <asm/io.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+
+static struct dentry *imc_debugfs_parent;
+
+/* Helpers to export imc command and mode via debugfs */
+static int imc_mem_get(void *data, u64 *val)
+{
+ *val = cpu_to_be64(*(u64 *)data);
+ return 0;
+}
+
+static int imc_mem_set(void *data, u64 val)
+{
+ *(u64 *)data = cpu_to_be64(val);
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n");
+
+static void imc_debugfs_create_x64(const char *name, umode_t mode,
+ struct dentry *parent, u64 *value)
+{
+ debugfs_create_file_unsafe(name, mode, parent, value, &fops_imc_x64);
+}
+
+/*
+ * export_imc_mode_and_cmd: Create a debugfs interface
+ * for imc_cmd and imc_mode
+ * for each node in the system.
+ * imc_mode and imc_cmd can be changed by echo into
+ * this interface.
+ */
+static void export_imc_mode_and_cmd(struct device_node *node,
+ struct imc_pmu *pmu_ptr)
+{
+ static u64 loc, *imc_mode_addr, *imc_cmd_addr;
+ char mode[16], cmd[16];
+ u32 cb_offset;
+ struct imc_mem_info *ptr = pmu_ptr->mem_info;
+
+ imc_debugfs_parent = debugfs_create_dir("imc", arch_debugfs_dir);
+
+ if (of_property_read_u32(node, "cb_offset", &cb_offset))
+ cb_offset = IMC_CNTL_BLK_OFFSET;
+
+ while (ptr->vbase != NULL) {
+ loc = (u64)(ptr->vbase) + cb_offset;
+ imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET);
+ sprintf(mode, "imc_mode_%d", (u32)(ptr->id));
+ imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent,
+ imc_mode_addr);
+
+ imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET);
+ sprintf(cmd, "imc_cmd_%d", (u32)(ptr->id));
+ imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent,
+ imc_cmd_addr);
+ ptr++;
+ }
+}
+
+/*
+ * imc_get_mem_addr_nest: Function to get nest counter memory region
+ * for each chip
+ */
+static int imc_get_mem_addr_nest(struct device_node *node,
+ struct imc_pmu *pmu_ptr,
+ u32 offset)
+{
+ int nr_chips = 0, i;
+ u64 *base_addr_arr, baddr;
+ u32 *chipid_arr;
+
+ nr_chips = of_property_count_u32_elems(node, "chip-id");
+ if (nr_chips <= 0)
+ return -ENODEV;
+
+ base_addr_arr = kcalloc(nr_chips, sizeof(*base_addr_arr), GFP_KERNEL);
+ if (!base_addr_arr)
+ return -ENOMEM;
+
+ chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL);
+ if (!chipid_arr) {
+ kfree(base_addr_arr);
+ return -ENOMEM;
+ }
+
+ if (of_property_read_u32_array(node, "chip-id", chipid_arr, nr_chips))
+ goto error;
+
+ if (of_property_read_u64_array(node, "base-addr", base_addr_arr,
+ nr_chips))
+ goto error;
+
+ pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info),
+ GFP_KERNEL);
+ if (!pmu_ptr->mem_info)
+ goto error;
+
+ for (i = 0; i < nr_chips; i++) {
+ pmu_ptr->mem_info[i].id = chipid_arr[i];
+ baddr = base_addr_arr[i] + offset;
+ pmu_ptr->mem_info[i].vbase = phys_to_virt(baddr);
+ }
+
+ pmu_ptr->imc_counter_mmaped = true;
+ kfree(base_addr_arr);
+ kfree(chipid_arr);
+ return 0;
+
+error:
+ kfree(base_addr_arr);
+ kfree(chipid_arr);
+ return -1;
+}
+
+/*
+ * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index
+ * and domain as the inputs.
+ * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets
+ */
+static struct imc_pmu *imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
+{
+ int ret = 0;
+ struct imc_pmu *pmu_ptr;
+ u32 offset;
+
+ /* Return for unknown domain */
+ if (domain < 0)
+ return NULL;
+
+ /* memory for pmu */
+ pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
+ if (!pmu_ptr)
+ return NULL;
+
+ /* Set the domain */
+ pmu_ptr->domain = domain;
+
+ ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size);
+ if (ret)
+ goto free_pmu;
+
+ if (!of_property_read_u32(parent, "offset", &offset)) {
+ if (imc_get_mem_addr_nest(parent, pmu_ptr, offset))
+ goto free_pmu;
+ }
+
+ /* Function to register IMC pmu */
+ ret = init_imc_pmu(parent, pmu_ptr, pmu_index);
+ if (ret) {
+ pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name);
+ kfree(pmu_ptr->pmu.name);
+ if (pmu_ptr->domain == IMC_DOMAIN_NEST)
+ kfree(pmu_ptr->mem_info);
+ kfree(pmu_ptr);
+ return NULL;
+ }
+
+ return pmu_ptr;
+
+free_pmu:
+ kfree(pmu_ptr);
+ return NULL;
+}
+
+static void disable_nest_pmu_counters(void)
+{
+ int nid, cpu;
+ const struct cpumask *l_cpumask;
+
+ cpus_read_lock();
+ for_each_node_with_cpus(nid) {
+ l_cpumask = cpumask_of_node(nid);
+ cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ continue;
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(cpu));
+ }
+ cpus_read_unlock();
+}
+
+static void disable_core_pmu_counters(void)
+{
+ int cpu, rc;
+
+ cpus_read_lock();
+ /* Disable the IMC Core functions */
+ for_each_online_cpu(cpu) {
+ if (cpu_first_thread_sibling(cpu) != cpu)
+ continue;
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(cpu));
+ if (rc)
+ pr_err("%s: Failed to stop Core (cpu = %d)\n",
+ __func__, cpu);
+ }
+ cpus_read_unlock();
+}
+
+int get_max_nest_dev(void)
+{
+ struct device_node *node;
+ u32 pmu_units = 0, type;
+
+ for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) {
+ if (of_property_read_u32(node, "type", &type))
+ continue;
+
+ if (type == IMC_TYPE_CHIP)
+ pmu_units++;
+ }
+
+ return pmu_units;
+}
+
+static int opal_imc_counters_probe(struct platform_device *pdev)
+{
+ struct device_node *imc_dev = pdev->dev.of_node;
+ struct imc_pmu *pmu;
+ int pmu_count = 0, domain;
+ bool core_imc_reg = false, thread_imc_reg = false;
+ u32 type;
+
+ /*
+ * Check whether this is kdump kernel. If yes, force the engines to
+ * stop and return.
+ */
+ if (is_kdump_kernel()) {
+ disable_nest_pmu_counters();
+ disable_core_pmu_counters();
+ return -ENODEV;
+ }
+
+ for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) {
+ pmu = NULL;
+ if (of_property_read_u32(imc_dev, "type", &type)) {
+ pr_warn("IMC Device without type property\n");
+ continue;
+ }
+
+ switch (type) {
+ case IMC_TYPE_CHIP:
+ domain = IMC_DOMAIN_NEST;
+ break;
+ case IMC_TYPE_CORE:
+ domain =IMC_DOMAIN_CORE;
+ break;
+ case IMC_TYPE_THREAD:
+ domain = IMC_DOMAIN_THREAD;
+ break;
+ case IMC_TYPE_TRACE:
+ domain = IMC_DOMAIN_TRACE;
+ break;
+ default:
+ pr_warn("IMC Unknown Device type \n");
+ domain = -1;
+ break;
+ }
+
+ pmu = imc_pmu_create(imc_dev, pmu_count, domain);
+ if (pmu != NULL) {
+ if (domain == IMC_DOMAIN_NEST) {
+ if (!imc_debugfs_parent)
+ export_imc_mode_and_cmd(imc_dev, pmu);
+ pmu_count++;
+ }
+ if (domain == IMC_DOMAIN_CORE)
+ core_imc_reg = true;
+ if (domain == IMC_DOMAIN_THREAD)
+ thread_imc_reg = true;
+ }
+ }
+
+ /* If core imc is not registered, unregister thread-imc */
+ if (!core_imc_reg && thread_imc_reg)
+ unregister_thread_imc();
+
+ return 0;
+}
+
+static void opal_imc_counters_shutdown(struct platform_device *pdev)
+{
+ /*
+ * Function only stops the engines which is bare minimum.
+ * TODO: Need to handle proper memory cleanup and pmu
+ * unregister.
+ */
+ disable_nest_pmu_counters();
+ disable_core_pmu_counters();
+}
+
+static const struct of_device_id opal_imc_match[] = {
+ { .compatible = IMC_DTB_COMPAT },
+ {},
+};
+
+static struct platform_driver opal_imc_driver = {
+ .driver = {
+ .name = "opal-imc-counters",
+ .of_match_table = opal_imc_match,
+ },
+ .probe = opal_imc_counters_probe,
+ .shutdown = opal_imc_counters_shutdown,
+};
+
+builtin_platform_driver(opal_imc_driver);
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
new file mode 100644
index 000000000..56a1f7ce7
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file implements an irqchip for OPAL events. Whenever there is
+ * an interrupt that is handled by OPAL we get passed a list of events
+ * that Linux needs to do something about. These basically look like
+ * interrupts to Linux so we implement an irqchip to handle them.
+ *
+ * Copyright Alistair Popple, IBM Corporation 2014.
+ */
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+
+#include <asm/machdep.h>
+#include <asm/opal.h>
+
+#include "powernv.h"
+
+/* Maximum number of events supported by OPAL firmware */
+#define MAX_NUM_EVENTS 64
+
+struct opal_event_irqchip {
+ struct irq_chip irqchip;
+ struct irq_domain *domain;
+ unsigned long mask;
+};
+static struct opal_event_irqchip opal_event_irqchip;
+static u64 last_outstanding_events;
+static int opal_irq_count;
+static struct resource *opal_irqs;
+
+void opal_handle_events(void)
+{
+ __be64 events = 0;
+ u64 e;
+
+ e = READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask;
+again:
+ while (e) {
+ int hwirq;
+
+ hwirq = fls64(e) - 1;
+ e &= ~BIT_ULL(hwirq);
+
+ local_irq_disable();
+ irq_enter();
+ generic_handle_domain_irq(opal_event_irqchip.domain, hwirq);
+ irq_exit();
+ local_irq_enable();
+
+ cond_resched();
+ }
+ WRITE_ONCE(last_outstanding_events, 0);
+ if (opal_poll_events(&events) != OPAL_SUCCESS)
+ return;
+ e = be64_to_cpu(events) & opal_event_irqchip.mask;
+ if (e)
+ goto again;
+}
+
+bool opal_have_pending_events(void)
+{
+ if (READ_ONCE(last_outstanding_events) & opal_event_irqchip.mask)
+ return true;
+ return false;
+}
+
+static void opal_event_mask(struct irq_data *d)
+{
+ clear_bit(d->hwirq, &opal_event_irqchip.mask);
+}
+
+static void opal_event_unmask(struct irq_data *d)
+{
+ set_bit(d->hwirq, &opal_event_irqchip.mask);
+ if (opal_have_pending_events())
+ opal_wake_poller();
+}
+
+static int opal_event_set_type(struct irq_data *d, unsigned int flow_type)
+{
+ /*
+ * For now we only support level triggered events. The irq
+ * handler will be called continuously until the event has
+ * been cleared in OPAL.
+ */
+ if (flow_type != IRQ_TYPE_LEVEL_HIGH)
+ return -EINVAL;
+
+ return 0;
+}
+
+static struct opal_event_irqchip opal_event_irqchip = {
+ .irqchip = {
+ .name = "OPAL EVT",
+ .irq_mask = opal_event_mask,
+ .irq_unmask = opal_event_unmask,
+ .irq_set_type = opal_event_set_type,
+ },
+ .mask = 0,
+};
+
+static int opal_event_map(struct irq_domain *d, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ irq_set_chip_data(irq, &opal_event_irqchip);
+ irq_set_chip_and_handler(irq, &opal_event_irqchip.irqchip,
+ handle_level_irq);
+
+ return 0;
+}
+
+static irqreturn_t opal_interrupt(int irq, void *data)
+{
+ __be64 events;
+
+ opal_handle_interrupt(virq_to_hw(irq), &events);
+ WRITE_ONCE(last_outstanding_events, be64_to_cpu(events));
+ if (opal_have_pending_events())
+ opal_wake_poller();
+
+ return IRQ_HANDLED;
+}
+
+static int opal_event_match(struct irq_domain *h, struct device_node *node,
+ enum irq_domain_bus_token bus_token)
+{
+ return irq_domain_get_of_node(h) == node;
+}
+
+static int opal_event_xlate(struct irq_domain *h, struct device_node *np,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_flags)
+{
+ *out_hwirq = intspec[0];
+ *out_flags = IRQ_TYPE_LEVEL_HIGH;
+
+ return 0;
+}
+
+static const struct irq_domain_ops opal_event_domain_ops = {
+ .match = opal_event_match,
+ .map = opal_event_map,
+ .xlate = opal_event_xlate,
+};
+
+void opal_event_shutdown(void)
+{
+ unsigned int i;
+
+ /* First free interrupts, which will also mask them */
+ for (i = 0; i < opal_irq_count; i++) {
+ if (!opal_irqs || !opal_irqs[i].start)
+ continue;
+
+ if (in_interrupt() || irqs_disabled())
+ disable_irq_nosync(opal_irqs[i].start);
+ else
+ free_irq(opal_irqs[i].start, NULL);
+
+ opal_irqs[i].start = 0;
+ }
+}
+
+int __init opal_event_init(void)
+{
+ struct device_node *dn, *opal_node;
+ bool old_style = false;
+ int i, rc = 0;
+
+ opal_node = of_find_node_by_path("/ibm,opal");
+ if (!opal_node) {
+ pr_warn("opal: Node not found\n");
+ return -ENODEV;
+ }
+
+ /* If dn is NULL it means the domain won't be linked to a DT
+ * node so therefore irq_of_parse_and_map(...) wont work. But
+ * that shouldn't be problem because if we're running a
+ * version of skiboot that doesn't have the dn then the
+ * devices won't have the correct properties and will have to
+ * fall back to the legacy method (opal_event_request(...))
+ * anyway. */
+ dn = of_find_compatible_node(NULL, NULL, "ibm,opal-event");
+ opal_event_irqchip.domain = irq_domain_add_linear(dn, MAX_NUM_EVENTS,
+ &opal_event_domain_ops, &opal_event_irqchip);
+ of_node_put(dn);
+ if (!opal_event_irqchip.domain) {
+ pr_warn("opal: Unable to create irq domain\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Look for new-style (standard) "interrupts" property */
+ opal_irq_count = of_irq_count(opal_node);
+
+ /* Absent ? Look for the old one */
+ if (opal_irq_count < 1) {
+ /* Get opal-interrupts property and names if present */
+ rc = of_property_count_u32_elems(opal_node, "opal-interrupts");
+ if (rc > 0)
+ opal_irq_count = rc;
+ old_style = true;
+ }
+
+ /* No interrupts ? Bail out */
+ if (!opal_irq_count)
+ goto out;
+
+ pr_debug("OPAL: Found %d interrupts reserved for OPAL using %s scheme\n",
+ opal_irq_count, old_style ? "old" : "new");
+
+ /* Allocate an IRQ resources array */
+ opal_irqs = kcalloc(opal_irq_count, sizeof(struct resource), GFP_KERNEL);
+ if (WARN_ON(!opal_irqs)) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Build the resources array */
+ if (old_style) {
+ /* Old style "opal-interrupts" property */
+ for (i = 0; i < opal_irq_count; i++) {
+ struct resource *r = &opal_irqs[i];
+ const char *name = NULL;
+ u32 hw_irq;
+ int virq;
+
+ rc = of_property_read_u32_index(opal_node, "opal-interrupts",
+ i, &hw_irq);
+ if (WARN_ON(rc < 0)) {
+ opal_irq_count = i;
+ break;
+ }
+ of_property_read_string_index(opal_node, "opal-interrupts-names",
+ i, &name);
+ virq = irq_create_mapping(NULL, hw_irq);
+ if (!virq) {
+ pr_warn("Failed to map OPAL irq 0x%x\n", hw_irq);
+ continue;
+ }
+ r->start = r->end = virq;
+ r->flags = IORESOURCE_IRQ | IRQ_TYPE_LEVEL_LOW;
+ r->name = name;
+ }
+ } else {
+ /* new style standard "interrupts" property */
+ rc = of_irq_to_resource_table(opal_node, opal_irqs, opal_irq_count);
+ if (WARN_ON(rc < 0)) {
+ opal_irq_count = 0;
+ kfree(opal_irqs);
+ goto out;
+ }
+ if (WARN_ON(rc < opal_irq_count))
+ opal_irq_count = rc;
+ }
+
+ /* Install interrupt handlers */
+ for (i = 0; i < opal_irq_count; i++) {
+ struct resource *r = &opal_irqs[i];
+ const char *name;
+
+ /* Prefix name */
+ if (r->name && strlen(r->name))
+ name = kasprintf(GFP_KERNEL, "opal-%s", r->name);
+ else
+ name = kasprintf(GFP_KERNEL, "opal");
+
+ if (!name)
+ continue;
+ /* Install interrupt handler */
+ rc = request_irq(r->start, opal_interrupt, r->flags & IRQD_TRIGGER_MASK,
+ name, NULL);
+ if (rc) {
+ pr_warn("Error %d requesting OPAL irq %d\n", rc, (int)r->start);
+ continue;
+ }
+ }
+ rc = 0;
+ out:
+ of_node_put(opal_node);
+ return rc;
+}
+machine_arch_initcall(powernv, opal_event_init);
+
+/**
+ * opal_event_request(unsigned int opal_event_nr) - Request an event
+ * @opal_event_nr: the opal event number to request
+ *
+ * This routine can be used to find the linux virq number which can
+ * then be passed to request_irq to assign a handler for a particular
+ * opal event. This should only be used by legacy devices which don't
+ * have proper device tree bindings. Most devices should use
+ * irq_of_parse_and_map() instead.
+ */
+int opal_event_request(unsigned int opal_event_nr)
+{
+ if (WARN_ON_ONCE(!opal_event_irqchip.domain))
+ return 0;
+
+ return irq_create_mapping(opal_event_irqchip.domain, opal_event_nr);
+}
+EXPORT_SYMBOL(opal_event_request);
diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c
new file mode 100644
index 000000000..6c3bc4b4d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * kmsg dumper that ensures the OPAL console fully flushes panic messages
+ *
+ * Author: Russell Currey <ruscur@russell.cc>
+ *
+ * Copyright 2015 IBM Corporation.
+ */
+
+#include <linux/kmsg_dump.h>
+
+#include <asm/opal.h>
+#include <asm/opal-api.h>
+
+/*
+ * Console output is controlled by OPAL firmware. The kernel regularly calls
+ * OPAL_POLL_EVENTS, which flushes some console output. In a panic state,
+ * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message
+ * may not be completely printed. This function does not actually dump the
+ * message, it just ensures that OPAL completely flushes the console buffer.
+ */
+static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper,
+ enum kmsg_dump_reason reason)
+{
+ /*
+ * Outside of a panic context the pollers will continue to run,
+ * so we don't need to do any special flushing.
+ */
+ if (reason != KMSG_DUMP_PANIC)
+ return;
+
+ opal_flush_console(0);
+}
+
+static struct kmsg_dumper opal_kmsg_dumper = {
+ .dump = kmsg_dump_opal_console_flush
+};
+
+void __init opal_kmsg_init(void)
+{
+ int rc;
+
+ /* Add our dumper to the list */
+ rc = kmsg_dump_register(&opal_kmsg_dumper);
+ if (rc != 0)
+ pr_err("opal: kmsg_dump_register failed; returned %d\n", rc);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c
new file mode 100644
index 000000000..a16f07cda
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-lpc.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV LPC bus handling.
+ *
+ * Copyright 2013 IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/bug.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/opal.h>
+#include <asm/prom.h>
+#include <linux/uaccess.h>
+#include <asm/isa-bridge.h>
+
+static int opal_lpc_chip_id = -1;
+
+static u8 opal_lpc_inb(unsigned long port)
+{
+ int64_t rc;
+ __be32 data;
+
+ if (opal_lpc_chip_id < 0 || port > 0xffff)
+ return 0xff;
+ rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 1);
+ return rc ? 0xff : be32_to_cpu(data);
+}
+
+static __le16 __opal_lpc_inw(unsigned long port)
+{
+ int64_t rc;
+ __be32 data;
+
+ if (opal_lpc_chip_id < 0 || port > 0xfffe)
+ return 0xffff;
+ if (port & 1)
+ return (__le16)opal_lpc_inb(port) << 8 | opal_lpc_inb(port + 1);
+ rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 2);
+ return rc ? 0xffff : be32_to_cpu(data);
+}
+static u16 opal_lpc_inw(unsigned long port)
+{
+ return le16_to_cpu(__opal_lpc_inw(port));
+}
+
+static __le32 __opal_lpc_inl(unsigned long port)
+{
+ int64_t rc;
+ __be32 data;
+
+ if (opal_lpc_chip_id < 0 || port > 0xfffc)
+ return 0xffffffff;
+ if (port & 3)
+ return (__le32)opal_lpc_inb(port ) << 24 |
+ (__le32)opal_lpc_inb(port + 1) << 16 |
+ (__le32)opal_lpc_inb(port + 2) << 8 |
+ opal_lpc_inb(port + 3);
+ rc = opal_lpc_read(opal_lpc_chip_id, OPAL_LPC_IO, port, &data, 4);
+ return rc ? 0xffffffff : be32_to_cpu(data);
+}
+
+static u32 opal_lpc_inl(unsigned long port)
+{
+ return le32_to_cpu(__opal_lpc_inl(port));
+}
+
+static void opal_lpc_outb(u8 val, unsigned long port)
+{
+ if (opal_lpc_chip_id < 0 || port > 0xffff)
+ return;
+ opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 1);
+}
+
+static void __opal_lpc_outw(__le16 val, unsigned long port)
+{
+ if (opal_lpc_chip_id < 0 || port > 0xfffe)
+ return;
+ if (port & 1) {
+ opal_lpc_outb(val >> 8, port);
+ opal_lpc_outb(val , port + 1);
+ return;
+ }
+ opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 2);
+}
+
+static void opal_lpc_outw(u16 val, unsigned long port)
+{
+ __opal_lpc_outw(cpu_to_le16(val), port);
+}
+
+static void __opal_lpc_outl(__le32 val, unsigned long port)
+{
+ if (opal_lpc_chip_id < 0 || port > 0xfffc)
+ return;
+ if (port & 3) {
+ opal_lpc_outb(val >> 24, port);
+ opal_lpc_outb(val >> 16, port + 1);
+ opal_lpc_outb(val >> 8, port + 2);
+ opal_lpc_outb(val , port + 3);
+ return;
+ }
+ opal_lpc_write(opal_lpc_chip_id, OPAL_LPC_IO, port, val, 4);
+}
+
+static void opal_lpc_outl(u32 val, unsigned long port)
+{
+ __opal_lpc_outl(cpu_to_le32(val), port);
+}
+
+static void opal_lpc_insb(unsigned long p, void *b, unsigned long c)
+{
+ u8 *ptr = b;
+
+ while(c--)
+ *(ptr++) = opal_lpc_inb(p);
+}
+
+static void opal_lpc_insw(unsigned long p, void *b, unsigned long c)
+{
+ __le16 *ptr = b;
+
+ while(c--)
+ *(ptr++) = __opal_lpc_inw(p);
+}
+
+static void opal_lpc_insl(unsigned long p, void *b, unsigned long c)
+{
+ __le32 *ptr = b;
+
+ while(c--)
+ *(ptr++) = __opal_lpc_inl(p);
+}
+
+static void opal_lpc_outsb(unsigned long p, const void *b, unsigned long c)
+{
+ const u8 *ptr = b;
+
+ while(c--)
+ opal_lpc_outb(*(ptr++), p);
+}
+
+static void opal_lpc_outsw(unsigned long p, const void *b, unsigned long c)
+{
+ const __le16 *ptr = b;
+
+ while(c--)
+ __opal_lpc_outw(*(ptr++), p);
+}
+
+static void opal_lpc_outsl(unsigned long p, const void *b, unsigned long c)
+{
+ const __le32 *ptr = b;
+
+ while(c--)
+ __opal_lpc_outl(*(ptr++), p);
+}
+
+static const struct ppc_pci_io opal_lpc_io = {
+ .inb = opal_lpc_inb,
+ .inw = opal_lpc_inw,
+ .inl = opal_lpc_inl,
+ .outb = opal_lpc_outb,
+ .outw = opal_lpc_outw,
+ .outl = opal_lpc_outl,
+ .insb = opal_lpc_insb,
+ .insw = opal_lpc_insw,
+ .insl = opal_lpc_insl,
+ .outsb = opal_lpc_outsb,
+ .outsw = opal_lpc_outsw,
+ .outsl = opal_lpc_outsl,
+};
+
+#ifdef CONFIG_DEBUG_FS
+struct lpc_debugfs_entry {
+ enum OpalLPCAddressType lpc_type;
+};
+
+static ssize_t lpc_debug_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct lpc_debugfs_entry *lpc = filp->private_data;
+ u32 data, pos, len, todo;
+ int rc;
+
+ if (!access_ok(ubuf, count))
+ return -EFAULT;
+
+ todo = count;
+ while (todo) {
+ pos = *ppos;
+
+ /*
+ * Select access size based on count and alignment and
+ * access type. IO and MEM only support byte accesses,
+ * FW supports all 3.
+ */
+ len = 1;
+ if (lpc->lpc_type == OPAL_LPC_FW) {
+ if (todo > 3 && (pos & 3) == 0)
+ len = 4;
+ else if (todo > 1 && (pos & 1) == 0)
+ len = 2;
+ }
+ rc = opal_lpc_read(opal_lpc_chip_id, lpc->lpc_type, pos,
+ &data, len);
+ if (rc)
+ return -ENXIO;
+
+ /*
+ * Now there is some trickery with the data returned by OPAL
+ * as it's the desired data right justified in a 32-bit BE
+ * word.
+ *
+ * This is a very bad interface and I'm to blame for it :-(
+ *
+ * So we can't just apply a 32-bit swap to what comes from OPAL,
+ * because user space expects the *bytes* to be in their proper
+ * respective positions (ie, LPC position).
+ *
+ * So what we really want to do here is to shift data right
+ * appropriately on a LE kernel.
+ *
+ * IE. If the LPC transaction has bytes B0, B1, B2 and B3 in that
+ * order, we have in memory written to by OPAL at the "data"
+ * pointer:
+ *
+ * Bytes: OPAL "data" LE "data"
+ * 32-bit: B0 B1 B2 B3 B0B1B2B3 B3B2B1B0
+ * 16-bit: B0 B1 0000B0B1 B1B00000
+ * 8-bit: B0 000000B0 B0000000
+ *
+ * So a BE kernel will have the leftmost of the above in the MSB
+ * and rightmost in the LSB and can just then "cast" the u32 "data"
+ * down to the appropriate quantity and write it.
+ *
+ * However, an LE kernel can't. It doesn't need to swap because a
+ * load from data followed by a store to user are going to preserve
+ * the byte ordering which is the wire byte order which is what the
+ * user wants, but in order to "crop" to the right size, we need to
+ * shift right first.
+ */
+ switch(len) {
+ case 4:
+ rc = __put_user((u32)data, (u32 __user *)ubuf);
+ break;
+ case 2:
+#ifdef __LITTLE_ENDIAN__
+ data >>= 16;
+#endif
+ rc = __put_user((u16)data, (u16 __user *)ubuf);
+ break;
+ default:
+#ifdef __LITTLE_ENDIAN__
+ data >>= 24;
+#endif
+ rc = __put_user((u8)data, (u8 __user *)ubuf);
+ break;
+ }
+ if (rc)
+ return -EFAULT;
+ *ppos += len;
+ ubuf += len;
+ todo -= len;
+ }
+
+ return count;
+}
+
+static ssize_t lpc_debug_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct lpc_debugfs_entry *lpc = filp->private_data;
+ u32 data, pos, len, todo;
+ int rc;
+
+ if (!access_ok(ubuf, count))
+ return -EFAULT;
+
+ todo = count;
+ while (todo) {
+ pos = *ppos;
+
+ /*
+ * Select access size based on count and alignment and
+ * access type. IO and MEM only support byte acceses,
+ * FW supports all 3.
+ */
+ len = 1;
+ if (lpc->lpc_type == OPAL_LPC_FW) {
+ if (todo > 3 && (pos & 3) == 0)
+ len = 4;
+ else if (todo > 1 && (pos & 1) == 0)
+ len = 2;
+ }
+
+ /*
+ * Similarly to the read case, we have some trickery here but
+ * it's different to handle. We need to pass the value to OPAL in
+ * a register whose layout depends on the access size. We want
+ * to reproduce the memory layout of the user, however we aren't
+ * doing a load from user and a store to another memory location
+ * which would achieve that. Here we pass the value to OPAL via
+ * a register which is expected to contain the "BE" interpretation
+ * of the byte sequence. IE: for a 32-bit access, byte 0 should be
+ * in the MSB. So here we *do* need to byteswap on LE.
+ *
+ * User bytes: LE "data" OPAL "data"
+ * 32-bit: B0 B1 B2 B3 B3B2B1B0 B0B1B2B3
+ * 16-bit: B0 B1 0000B1B0 0000B0B1
+ * 8-bit: B0 000000B0 000000B0
+ */
+ switch(len) {
+ case 4:
+ rc = __get_user(data, (u32 __user *)ubuf);
+ data = cpu_to_be32(data);
+ break;
+ case 2:
+ rc = __get_user(data, (u16 __user *)ubuf);
+ data = cpu_to_be16(data);
+ break;
+ default:
+ rc = __get_user(data, (u8 __user *)ubuf);
+ break;
+ }
+ if (rc)
+ return -EFAULT;
+
+ rc = opal_lpc_write(opal_lpc_chip_id, lpc->lpc_type, pos,
+ data, len);
+ if (rc)
+ return -ENXIO;
+ *ppos += len;
+ ubuf += len;
+ todo -= len;
+ }
+
+ return count;
+}
+
+static const struct file_operations lpc_fops = {
+ .read = lpc_debug_read,
+ .write = lpc_debug_write,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static int opal_lpc_debugfs_create_type(struct dentry *folder,
+ const char *fname,
+ enum OpalLPCAddressType type)
+{
+ struct lpc_debugfs_entry *entry;
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+ entry->lpc_type = type;
+ debugfs_create_file(fname, 0600, folder, entry, &lpc_fops);
+ return 0;
+}
+
+static int opal_lpc_init_debugfs(void)
+{
+ struct dentry *root;
+ int rc = 0;
+
+ if (opal_lpc_chip_id < 0)
+ return -ENODEV;
+
+ root = debugfs_create_dir("lpc", arch_debugfs_dir);
+
+ rc |= opal_lpc_debugfs_create_type(root, "io", OPAL_LPC_IO);
+ rc |= opal_lpc_debugfs_create_type(root, "mem", OPAL_LPC_MEM);
+ rc |= opal_lpc_debugfs_create_type(root, "fw", OPAL_LPC_FW);
+ return rc;
+}
+machine_device_initcall(powernv, opal_lpc_init_debugfs);
+#endif /* CONFIG_DEBUG_FS */
+
+void __init opal_lpc_init(void)
+{
+ struct device_node *np;
+
+ /*
+ * Look for a Power8 LPC bus tagged as "primary",
+ * we currently support only one though the OPAL APIs
+ * support any number.
+ */
+ for_each_compatible_node(np, NULL, "ibm,power8-lpc") {
+ if (!of_device_is_available(np))
+ continue;
+ if (!of_get_property(np, "primary", NULL))
+ continue;
+ opal_lpc_chip_id = of_get_ibm_chip_id(np);
+ of_node_put(np);
+ break;
+ }
+ if (opal_lpc_chip_id < 0)
+ return;
+
+ /* Does it support direct mapping ? */
+ if (of_property_present(np, "ranges")) {
+ pr_info("OPAL: Found memory mapped LPC bus on chip %d\n",
+ opal_lpc_chip_id);
+ isa_bridge_init_non_pci(np);
+ } else {
+ pr_info("OPAL: Found non-mapped LPC bus on chip %d\n",
+ opal_lpc_chip_id);
+
+ /* Setup special IO ops */
+ ppc_pci_io = opal_lpc_io;
+ isa_io_special = true;
+ }
+}
diff --git a/arch/powerpc/platforms/powernv/opal-memory-errors.c b/arch/powerpc/platforms/powernv/opal-memory-errors.c
new file mode 100644
index 000000000..a1754a282
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-memory-errors.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * OPAL asynchronus Memory error handling support in PowerNV.
+ *
+ * Copyright 2013 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/cputable.h>
+
+static int opal_mem_err_nb_init;
+static LIST_HEAD(opal_memory_err_list);
+static DEFINE_SPINLOCK(opal_mem_err_lock);
+
+struct OpalMsgNode {
+ struct list_head list;
+ struct opal_msg msg;
+};
+
+static void handle_memory_error_event(struct OpalMemoryErrorData *merr_evt)
+{
+ uint64_t paddr_start, paddr_end;
+
+ pr_debug("%s: Retrieved memory error event, type: 0x%x\n",
+ __func__, merr_evt->type);
+ switch (merr_evt->type) {
+ case OPAL_MEM_ERR_TYPE_RESILIENCE:
+ paddr_start = be64_to_cpu(merr_evt->u.resilience.physical_address_start);
+ paddr_end = be64_to_cpu(merr_evt->u.resilience.physical_address_end);
+ break;
+ case OPAL_MEM_ERR_TYPE_DYN_DALLOC:
+ paddr_start = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start);
+ paddr_end = be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end);
+ break;
+ default:
+ return;
+ }
+
+ for (; paddr_start < paddr_end; paddr_start += PAGE_SIZE) {
+ memory_failure(paddr_start >> PAGE_SHIFT, 0);
+ }
+}
+
+static void handle_memory_error(void)
+{
+ unsigned long flags;
+ struct OpalMemoryErrorData *merr_evt;
+ struct OpalMsgNode *msg_node;
+
+ spin_lock_irqsave(&opal_mem_err_lock, flags);
+ while (!list_empty(&opal_memory_err_list)) {
+ msg_node = list_entry(opal_memory_err_list.next,
+ struct OpalMsgNode, list);
+ list_del(&msg_node->list);
+ spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+
+ merr_evt = (struct OpalMemoryErrorData *)
+ &msg_node->msg.params[0];
+ handle_memory_error_event(merr_evt);
+ kfree(msg_node);
+ spin_lock_irqsave(&opal_mem_err_lock, flags);
+ }
+ spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+}
+
+static void mem_error_handler(struct work_struct *work)
+{
+ handle_memory_error();
+}
+
+static DECLARE_WORK(mem_error_work, mem_error_handler);
+
+/*
+ * opal_memory_err_event - notifier handler that queues up the opal message
+ * to be processed later.
+ */
+static int opal_memory_err_event(struct notifier_block *nb,
+ unsigned long msg_type, void *msg)
+{
+ unsigned long flags;
+ struct OpalMsgNode *msg_node;
+
+ if (msg_type != OPAL_MSG_MEM_ERR)
+ return 0;
+
+ msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+ if (!msg_node) {
+ pr_err("MEMORY_ERROR: out of memory, Opal message event not"
+ "handled\n");
+ return -ENOMEM;
+ }
+ memcpy(&msg_node->msg, msg, sizeof(msg_node->msg));
+
+ spin_lock_irqsave(&opal_mem_err_lock, flags);
+ list_add(&msg_node->list, &opal_memory_err_list);
+ spin_unlock_irqrestore(&opal_mem_err_lock, flags);
+
+ schedule_work(&mem_error_work);
+ return 0;
+}
+
+static struct notifier_block opal_mem_err_nb = {
+ .notifier_call = opal_memory_err_event,
+ .next = NULL,
+ .priority = 0,
+};
+
+static int __init opal_mem_err_init(void)
+{
+ int ret;
+
+ if (!opal_mem_err_nb_init) {
+ ret = opal_message_notifier_register(
+ OPAL_MSG_MEM_ERR, &opal_mem_err_nb);
+ if (ret) {
+ pr_err("%s: Can't register OPAL event notifier (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+ opal_mem_err_nb_init = 1;
+ }
+ return 0;
+}
+machine_device_initcall(powernv, opal_mem_err_init);
diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c
new file mode 100644
index 000000000..22d6efe17
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-msglog.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL in-memory console interface
+ *
+ * Copyright 2014 IBM Corp.
+ */
+
+#include <asm/io.h>
+#include <asm/opal.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/types.h>
+#include <asm/barrier.h>
+
+#include "powernv.h"
+
+/* OPAL in-memory console. Defined in OPAL source at core/console.c */
+struct memcons {
+ __be64 magic;
+#define MEMCONS_MAGIC 0x6630696567726173L
+ __be64 obuf_phys;
+ __be64 ibuf_phys;
+ __be32 obuf_size;
+ __be32 ibuf_size;
+ __be32 out_pos;
+#define MEMCONS_OUT_POS_WRAP 0x80000000u
+#define MEMCONS_OUT_POS_MASK 0x00ffffffu
+ __be32 in_prod;
+ __be32 in_cons;
+};
+
+static struct memcons *opal_memcons = NULL;
+
+ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count)
+{
+ const char *conbuf;
+ ssize_t ret;
+ size_t first_read = 0;
+ uint32_t out_pos, avail;
+
+ if (!mc)
+ return -ENODEV;
+
+ out_pos = be32_to_cpu(READ_ONCE(mc->out_pos));
+
+ /* Now we've read out_pos, put a barrier in before reading the new
+ * data it points to in conbuf. */
+ smp_rmb();
+
+ conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys));
+
+ /* When the buffer has wrapped, read from the out_pos marker to the end
+ * of the buffer, and then read the remaining data as in the un-wrapped
+ * case. */
+ if (out_pos & MEMCONS_OUT_POS_WRAP) {
+
+ out_pos &= MEMCONS_OUT_POS_MASK;
+ avail = be32_to_cpu(mc->obuf_size) - out_pos;
+
+ ret = memory_read_from_buffer(to, count, &pos,
+ conbuf + out_pos, avail);
+
+ if (ret < 0)
+ goto out;
+
+ first_read = ret;
+ to += first_read;
+ count -= first_read;
+ pos -= avail;
+
+ if (count <= 0)
+ goto out;
+ }
+
+ /* Sanity check. The firmware should not do this to us. */
+ if (out_pos > be32_to_cpu(mc->obuf_size)) {
+ pr_err("OPAL: memory console corruption. Aborting read.\n");
+ return -EINVAL;
+ }
+
+ ret = memory_read_from_buffer(to, count, &pos, conbuf, out_pos);
+
+ if (ret < 0)
+ goto out;
+
+ ret += first_read;
+out:
+ return ret;
+}
+
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count)
+{
+ return memcons_copy(opal_memcons, to, pos, count);
+}
+
+static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *to,
+ loff_t pos, size_t count)
+{
+ return opal_msglog_copy(to, pos, count);
+}
+
+static struct bin_attribute opal_msglog_attr = {
+ .attr = {.name = "msglog", .mode = 0400},
+ .read = opal_msglog_read
+};
+
+struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name)
+{
+ u64 mcaddr;
+ struct memcons *mc;
+
+ if (of_property_read_u64(node, mc_prop_name, &mcaddr)) {
+ pr_warn("%s property not found, no message log\n",
+ mc_prop_name);
+ goto out_err;
+ }
+
+ mc = phys_to_virt(mcaddr);
+ if (!mc) {
+ pr_warn("memory console address is invalid\n");
+ goto out_err;
+ }
+
+ if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) {
+ pr_warn("memory console version is invalid\n");
+ goto out_err;
+ }
+
+ return mc;
+
+out_err:
+ return NULL;
+}
+
+u32 __init memcons_get_size(struct memcons *mc)
+{
+ return be32_to_cpu(mc->ibuf_size) + be32_to_cpu(mc->obuf_size);
+}
+
+void __init opal_msglog_init(void)
+{
+ opal_memcons = memcons_init(opal_node, "ibm,opal-memcons");
+ if (!opal_memcons) {
+ pr_warn("OPAL: memcons failed to load from ibm,opal-memcons\n");
+ return;
+ }
+
+ opal_msglog_attr.size = memcons_get_size(opal_memcons);
+}
+
+void __init opal_msglog_sysfs_init(void)
+{
+ if (!opal_memcons) {
+ pr_warn("OPAL: message log initialisation failed, not creating sysfs entry\n");
+ return;
+ }
+
+ if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0)
+ pr_warn("OPAL: sysfs file creation failed\n");
+}
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
new file mode 100644
index 000000000..380bc2d7e
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV nvram code.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#define DEBUG
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/of.h>
+
+#include <asm/opal.h>
+#include <asm/nvram.h>
+#include <asm/machdep.h>
+
+static unsigned int nvram_size;
+
+static ssize_t opal_nvram_size(void)
+{
+ return nvram_size;
+}
+
+static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index)
+{
+ s64 rc;
+ int off;
+
+ if (*index >= nvram_size)
+ return 0;
+ off = *index;
+ if ((off + count) > nvram_size)
+ count = nvram_size - off;
+ rc = opal_read_nvram(__pa(buf), count, off);
+ if (rc != OPAL_SUCCESS)
+ return -EIO;
+ *index += count;
+ return count;
+}
+
+/*
+ * This can be called in the panic path with interrupts off, so use
+ * mdelay in that case.
+ */
+static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
+{
+ s64 rc = OPAL_BUSY;
+ int off;
+
+ if (*index >= nvram_size)
+ return 0;
+ off = *index;
+ if ((off + count) > nvram_size)
+ count = nvram_size - off;
+
+ while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+ rc = opal_write_nvram(__pa(buf), count, off);
+ if (rc == OPAL_BUSY_EVENT) {
+ if (in_interrupt() || irqs_disabled())
+ mdelay(OPAL_BUSY_DELAY_MS);
+ else
+ msleep(OPAL_BUSY_DELAY_MS);
+ opal_poll_events(NULL);
+ } else if (rc == OPAL_BUSY) {
+ if (in_interrupt() || irqs_disabled())
+ mdelay(OPAL_BUSY_DELAY_MS);
+ else
+ msleep(OPAL_BUSY_DELAY_MS);
+ }
+ }
+
+ if (rc)
+ return -EIO;
+
+ *index += count;
+ return count;
+}
+
+static int __init opal_nvram_init_log_partitions(void)
+{
+ /* Scan nvram for partitions */
+ nvram_scan_partitions();
+ nvram_init_oops_partition(0);
+ return 0;
+}
+machine_arch_initcall(powernv, opal_nvram_init_log_partitions);
+
+void __init opal_nvram_init(void)
+{
+ struct device_node *np;
+ const __be32 *nbytes_p;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,opal-nvram");
+ if (np == NULL)
+ return;
+
+ nbytes_p = of_get_property(np, "#bytes", NULL);
+ if (!nbytes_p) {
+ of_node_put(np);
+ return;
+ }
+ nvram_size = be32_to_cpup(nbytes_p);
+
+ pr_info("OPAL nvram setup, %u bytes\n", nvram_size);
+ of_node_put(np);
+
+ ppc_md.nvram_read = opal_nvram_read;
+ ppc_md.nvram_write = opal_nvram_write;
+ ppc_md.nvram_size = opal_nvram_size;
+}
+
diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
new file mode 100644
index 000000000..db99ffcb7
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL power control for graceful shutdown handling
+ *
+ * Copyright 2015 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "opal-power: " fmt
+
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+
+#include <asm/opal.h>
+#include <asm/machdep.h>
+
+#define SOFT_OFF 0x00
+#define SOFT_REBOOT 0x01
+
+/* Detect EPOW event */
+static bool detect_epow(void)
+{
+ u16 epow;
+ int i, rc;
+ __be16 epow_classes;
+ __be16 opal_epow_status[OPAL_SYSEPOW_MAX] = {0};
+
+ /*
+ * Check for EPOW event. Kernel sends supported EPOW classes info
+ * to OPAL. OPAL returns EPOW info along with classes present.
+ */
+ epow_classes = cpu_to_be16(OPAL_SYSEPOW_MAX);
+ rc = opal_get_epow_status(opal_epow_status, &epow_classes);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("Failed to get EPOW event information\n");
+ return false;
+ }
+
+ /* Look for EPOW events present */
+ for (i = 0; i < be16_to_cpu(epow_classes); i++) {
+ epow = be16_to_cpu(opal_epow_status[i]);
+
+ /* Filter events which do not need shutdown. */
+ if (i == OPAL_SYSEPOW_POWER)
+ epow &= ~(OPAL_SYSPOWER_CHNG | OPAL_SYSPOWER_FAIL |
+ OPAL_SYSPOWER_INCL);
+ if (epow)
+ return true;
+ }
+
+ return false;
+}
+
+/* Check for existing EPOW, DPO events */
+static bool __init poweroff_pending(void)
+{
+ int rc;
+ __be64 opal_dpo_timeout;
+
+ /* Check for DPO event */
+ rc = opal_get_dpo_status(&opal_dpo_timeout);
+ if (rc == OPAL_SUCCESS) {
+ pr_info("Existing DPO event detected.\n");
+ return true;
+ }
+
+ /* Check for EPOW event */
+ if (detect_epow()) {
+ pr_info("Existing EPOW event detected.\n");
+ return true;
+ }
+
+ return false;
+}
+
+/* OPAL power-control events notifier */
+static int opal_power_control_event(struct notifier_block *nb,
+ unsigned long msg_type, void *msg)
+{
+ uint64_t type;
+
+ switch (msg_type) {
+ case OPAL_MSG_EPOW:
+ if (detect_epow()) {
+ pr_info("EPOW msg received. Powering off system\n");
+ orderly_poweroff(true);
+ }
+ break;
+ case OPAL_MSG_DPO:
+ pr_info("DPO msg received. Powering off system\n");
+ orderly_poweroff(true);
+ break;
+ case OPAL_MSG_SHUTDOWN:
+ type = be64_to_cpu(((struct opal_msg *)msg)->params[0]);
+ switch (type) {
+ case SOFT_REBOOT:
+ pr_info("Reboot requested\n");
+ orderly_reboot();
+ break;
+ case SOFT_OFF:
+ pr_info("Poweroff requested\n");
+ orderly_poweroff(true);
+ break;
+ default:
+ pr_err("Unknown power-control type %llu\n", type);
+ }
+ break;
+ default:
+ pr_err("Unknown OPAL message type %lu\n", msg_type);
+ }
+
+ return 0;
+}
+
+/* OPAL EPOW event notifier block */
+static struct notifier_block opal_epow_nb = {
+ .notifier_call = opal_power_control_event,
+ .next = NULL,
+ .priority = 0,
+};
+
+/* OPAL DPO event notifier block */
+static struct notifier_block opal_dpo_nb = {
+ .notifier_call = opal_power_control_event,
+ .next = NULL,
+ .priority = 0,
+};
+
+/* OPAL power-control event notifier block */
+static struct notifier_block opal_power_control_nb = {
+ .notifier_call = opal_power_control_event,
+ .next = NULL,
+ .priority = 0,
+};
+
+int __init opal_power_control_init(void)
+{
+ int ret, supported = 0;
+ struct device_node *np;
+
+ /* Register OPAL power-control events notifier */
+ ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN,
+ &opal_power_control_nb);
+ if (ret)
+ pr_err("Failed to register SHUTDOWN notifier, ret = %d\n", ret);
+
+ /* Determine OPAL EPOW, DPO support */
+ np = of_find_node_by_path("/ibm,opal/epow");
+ if (np) {
+ supported = of_device_is_compatible(np, "ibm,opal-v3-epow");
+ of_node_put(np);
+ }
+
+ if (!supported)
+ return 0;
+ pr_info("OPAL EPOW, DPO support detected.\n");
+
+ /* Register EPOW event notifier */
+ ret = opal_message_notifier_register(OPAL_MSG_EPOW, &opal_epow_nb);
+ if (ret)
+ pr_err("Failed to register EPOW notifier, ret = %d\n", ret);
+
+ /* Register DPO event notifier */
+ ret = opal_message_notifier_register(OPAL_MSG_DPO, &opal_dpo_nb);
+ if (ret)
+ pr_err("Failed to register DPO notifier, ret = %d\n", ret);
+
+ /* Check for any pending EPOW or DPO events. */
+ if (poweroff_pending())
+ orderly_poweroff(true);
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c
new file mode 100644
index 000000000..ea917266a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-powercap.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL Powercap interface
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "opal-powercap: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+static DEFINE_MUTEX(powercap_mutex);
+
+static struct kobject *powercap_kobj;
+
+struct powercap_attr {
+ u32 handle;
+ struct kobj_attribute attr;
+};
+
+static struct pcap {
+ struct attribute_group pg;
+ struct powercap_attr *pattrs;
+} *pcaps;
+
+static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct powercap_attr *pcap_attr = container_of(attr,
+ struct powercap_attr, attr);
+ struct opal_msg msg;
+ u32 pcap;
+ int ret, token;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&powercap_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_get_powercap(pcap_attr->handle, token, (u32 *)__pa(&pcap));
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret) {
+ ret = sprintf(buf, "%u\n", be32_to_cpu(pcap));
+ if (ret < 0)
+ ret = -EIO;
+ }
+ break;
+ case OPAL_SUCCESS:
+ ret = sprintf(buf, "%u\n", be32_to_cpu(pcap));
+ if (ret < 0)
+ ret = -EIO;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&powercap_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static ssize_t powercap_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct powercap_attr *pcap_attr = container_of(attr,
+ struct powercap_attr, attr);
+ struct opal_msg msg;
+ u32 pcap;
+ int ret, token;
+
+ ret = kstrtoint(buf, 0, &pcap);
+ if (ret)
+ return ret;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&powercap_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_set_powercap(pcap_attr->handle, token, pcap);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret)
+ ret = count;
+ break;
+ case OPAL_SUCCESS:
+ ret = count;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&powercap_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static void __init powercap_add_attr(int handle, const char *name,
+ struct powercap_attr *attr)
+{
+ attr->handle = handle;
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = powercap_show;
+}
+
+void __init opal_powercap_init(void)
+{
+ struct device_node *powercap, *node;
+ int i = 0;
+
+ powercap = of_find_compatible_node(NULL, NULL, "ibm,opal-powercap");
+ if (!powercap) {
+ pr_devel("Powercap node not found\n");
+ return;
+ }
+
+ pcaps = kcalloc(of_get_child_count(powercap), sizeof(*pcaps),
+ GFP_KERNEL);
+ if (!pcaps)
+ goto out_put_powercap;
+
+ powercap_kobj = kobject_create_and_add("powercap", opal_kobj);
+ if (!powercap_kobj) {
+ pr_warn("Failed to create powercap kobject\n");
+ goto out_pcaps;
+ }
+
+ i = 0;
+ for_each_child_of_node(powercap, node) {
+ u32 cur, min, max;
+ int j = 0;
+ bool has_cur = false, has_min = false, has_max = false;
+
+ if (!of_property_read_u32(node, "powercap-min", &min)) {
+ j++;
+ has_min = true;
+ }
+
+ if (!of_property_read_u32(node, "powercap-max", &max)) {
+ j++;
+ has_max = true;
+ }
+
+ if (!of_property_read_u32(node, "powercap-current", &cur)) {
+ j++;
+ has_cur = true;
+ }
+
+ pcaps[i].pattrs = kcalloc(j, sizeof(struct powercap_attr),
+ GFP_KERNEL);
+ if (!pcaps[i].pattrs)
+ goto out_pcaps_pattrs;
+
+ pcaps[i].pg.attrs = kcalloc(j + 1, sizeof(struct attribute *),
+ GFP_KERNEL);
+ if (!pcaps[i].pg.attrs) {
+ kfree(pcaps[i].pattrs);
+ goto out_pcaps_pattrs;
+ }
+
+ j = 0;
+ pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node);
+ if (!pcaps[i].pg.name) {
+ kfree(pcaps[i].pattrs);
+ kfree(pcaps[i].pg.attrs);
+ goto out_pcaps_pattrs;
+ }
+
+ if (has_min) {
+ powercap_add_attr(min, "powercap-min",
+ &pcaps[i].pattrs[j]);
+ pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+ j++;
+ }
+
+ if (has_max) {
+ powercap_add_attr(max, "powercap-max",
+ &pcaps[i].pattrs[j]);
+ pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+ j++;
+ }
+
+ if (has_cur) {
+ powercap_add_attr(cur, "powercap-current",
+ &pcaps[i].pattrs[j]);
+ pcaps[i].pattrs[j].attr.attr.mode |= 0220;
+ pcaps[i].pattrs[j].attr.store = powercap_store;
+ pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr;
+ j++;
+ }
+
+ if (sysfs_create_group(powercap_kobj, &pcaps[i].pg)) {
+ pr_warn("Failed to create powercap attribute group %s\n",
+ pcaps[i].pg.name);
+ goto out_pcaps_pattrs;
+ }
+ i++;
+ }
+ of_node_put(powercap);
+
+ return;
+
+out_pcaps_pattrs:
+ while (--i >= 0) {
+ kfree(pcaps[i].pattrs);
+ kfree(pcaps[i].pg.attrs);
+ kfree(pcaps[i].pg.name);
+ }
+ kobject_put(powercap_kobj);
+ of_node_put(node);
+out_pcaps:
+ kfree(pcaps);
+out_put_powercap:
+ of_node_put(powercap);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
new file mode 100644
index 000000000..327e2f769
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OPAL Runtime Diagnostics interface driver
+ * Supported on POWERNV platform
+ *
+ * Copyright IBM Corporation 2015
+ */
+
+#define pr_fmt(fmt) "opal-prd: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/poll.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/opal-prd.h>
+#include <asm/opal.h>
+#include <asm/io.h>
+#include <linux/uaccess.h>
+
+
+struct opal_prd_msg {
+ union {
+ struct opal_prd_msg_header header;
+ DECLARE_FLEX_ARRAY(u8, data);
+ };
+};
+
+/*
+ * The msg member must be at the end of the struct, as it's followed by the
+ * message data.
+ */
+struct opal_prd_msg_queue_item {
+ struct list_head list;
+ struct opal_prd_msg msg;
+};
+
+static struct device_node *prd_node;
+static LIST_HEAD(opal_prd_msg_queue);
+static DEFINE_SPINLOCK(opal_prd_msg_queue_lock);
+static DECLARE_WAIT_QUEUE_HEAD(opal_prd_msg_wait);
+static atomic_t prd_usage;
+
+static bool opal_prd_range_is_valid(uint64_t addr, uint64_t size)
+{
+ struct device_node *parent, *node;
+ bool found;
+
+ if (addr + size < addr)
+ return false;
+
+ parent = of_find_node_by_path("/reserved-memory");
+ if (!parent)
+ return false;
+
+ found = false;
+
+ for_each_child_of_node(parent, node) {
+ uint64_t range_addr, range_size, range_end;
+ const __be32 *addrp;
+ const char *label;
+
+ addrp = of_get_address(node, 0, &range_size, NULL);
+
+ range_addr = of_read_number(addrp, 2);
+ range_end = range_addr + range_size;
+
+ label = of_get_property(node, "ibm,prd-label", NULL);
+
+ /* PRD ranges need a label */
+ if (!label)
+ continue;
+
+ if (range_end <= range_addr)
+ continue;
+
+ if (addr >= range_addr && addr + size <= range_end) {
+ found = true;
+ of_node_put(node);
+ break;
+ }
+ }
+
+ of_node_put(parent);
+ return found;
+}
+
+static int opal_prd_open(struct inode *inode, struct file *file)
+{
+ /*
+ * Prevent multiple (separate) processes from concurrent interactions
+ * with the FW PRD channel
+ */
+ if (atomic_xchg(&prd_usage, 1) == 1)
+ return -EBUSY;
+
+ return 0;
+}
+
+/*
+ * opal_prd_mmap - maps firmware-provided ranges into userspace
+ * @file: file structure for the device
+ * @vma: VMA to map the registers into
+ */
+
+static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ size_t addr, size;
+ pgprot_t page_prot;
+
+ pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n",
+ vma->vm_start, vma->vm_end, vma->vm_pgoff,
+ vma->vm_flags);
+
+ addr = vma->vm_pgoff << PAGE_SHIFT;
+ size = vma->vm_end - vma->vm_start;
+
+ /* ensure we're mapping within one of the allowable ranges */
+ if (!opal_prd_range_is_valid(addr, size))
+ return -EINVAL;
+
+ page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
+ size, vma->vm_page_prot);
+
+ return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size,
+ page_prot);
+}
+
+static bool opal_msg_queue_empty(void)
+{
+ unsigned long flags;
+ bool ret;
+
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ ret = list_empty(&opal_prd_msg_queue);
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+ return ret;
+}
+
+static __poll_t opal_prd_poll(struct file *file,
+ struct poll_table_struct *wait)
+{
+ poll_wait(file, &opal_prd_msg_wait, wait);
+
+ if (!opal_msg_queue_empty())
+ return EPOLLIN | EPOLLRDNORM;
+
+ return 0;
+}
+
+static ssize_t opal_prd_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct opal_prd_msg_queue_item *item;
+ unsigned long flags;
+ ssize_t size, err;
+ int rc;
+
+ /* we need at least a header's worth of data */
+ if (count < sizeof(item->msg.header))
+ return -EINVAL;
+
+ if (*ppos)
+ return -ESPIPE;
+
+ item = NULL;
+
+ for (;;) {
+
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ if (!list_empty(&opal_prd_msg_queue)) {
+ item = list_first_entry(&opal_prd_msg_queue,
+ struct opal_prd_msg_queue_item, list);
+ list_del(&item->list);
+ }
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+ if (item)
+ break;
+
+ if (file->f_flags & O_NONBLOCK)
+ return -EAGAIN;
+
+ rc = wait_event_interruptible(opal_prd_msg_wait,
+ !opal_msg_queue_empty());
+ if (rc)
+ return -EINTR;
+ }
+
+ size = be16_to_cpu(item->msg.header.size);
+ if (size > count) {
+ err = -EINVAL;
+ goto err_requeue;
+ }
+
+ rc = copy_to_user(buf, &item->msg, size);
+ if (rc) {
+ err = -EFAULT;
+ goto err_requeue;
+ }
+
+ kfree(item);
+
+ return size;
+
+err_requeue:
+ /* eep! re-queue at the head of the list */
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ list_add(&item->list, &opal_prd_msg_queue);
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+ return err;
+}
+
+static ssize_t opal_prd_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct opal_prd_msg_header hdr;
+ struct opal_prd_msg *msg;
+ ssize_t size;
+ int rc;
+
+ size = sizeof(hdr);
+
+ if (count < size)
+ return -EINVAL;
+
+ /* grab the header */
+ rc = copy_from_user(&hdr, buf, sizeof(hdr));
+ if (rc)
+ return -EFAULT;
+
+ size = be16_to_cpu(hdr.size);
+
+ msg = memdup_user(buf, size);
+ if (IS_ERR(msg))
+ return PTR_ERR(msg);
+
+ rc = opal_prd_msg(msg);
+ if (rc) {
+ pr_warn("write: opal_prd_msg returned %d\n", rc);
+ size = -EIO;
+ }
+
+ kfree(msg);
+
+ return size;
+}
+
+static int opal_prd_release(struct inode *inode, struct file *file)
+{
+ struct opal_prd_msg msg;
+
+ msg.header.size = cpu_to_be16(sizeof(msg));
+ msg.header.type = OPAL_PRD_MSG_TYPE_FINI;
+
+ opal_prd_msg(&msg);
+
+ atomic_xchg(&prd_usage, 0);
+
+ return 0;
+}
+
+static long opal_prd_ioctl(struct file *file, unsigned int cmd,
+ unsigned long param)
+{
+ struct opal_prd_info info;
+ struct opal_prd_scom scom;
+ int rc = 0;
+
+ switch (cmd) {
+ case OPAL_PRD_GET_INFO:
+ memset(&info, 0, sizeof(info));
+ info.version = OPAL_PRD_KERNEL_VERSION;
+ rc = copy_to_user((void __user *)param, &info, sizeof(info));
+ if (rc)
+ return -EFAULT;
+ break;
+
+ case OPAL_PRD_SCOM_READ:
+ rc = copy_from_user(&scom, (void __user *)param, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+
+ scom.rc = opal_xscom_read(scom.chip, scom.addr,
+ (__be64 *)&scom.data);
+ scom.data = be64_to_cpu(scom.data);
+ pr_devel("ioctl SCOM_READ: chip %llx addr %016llx data %016llx rc %lld\n",
+ scom.chip, scom.addr, scom.data, scom.rc);
+
+ rc = copy_to_user((void __user *)param, &scom, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+ break;
+
+ case OPAL_PRD_SCOM_WRITE:
+ rc = copy_from_user(&scom, (void __user *)param, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+
+ scom.rc = opal_xscom_write(scom.chip, scom.addr, scom.data);
+ pr_devel("ioctl SCOM_WRITE: chip %llx addr %016llx data %016llx rc %lld\n",
+ scom.chip, scom.addr, scom.data, scom.rc);
+
+ rc = copy_to_user((void __user *)param, &scom, sizeof(scom));
+ if (rc)
+ return -EFAULT;
+ break;
+
+ default:
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static const struct file_operations opal_prd_fops = {
+ .open = opal_prd_open,
+ .mmap = opal_prd_mmap,
+ .poll = opal_prd_poll,
+ .read = opal_prd_read,
+ .write = opal_prd_write,
+ .unlocked_ioctl = opal_prd_ioctl,
+ .release = opal_prd_release,
+ .owner = THIS_MODULE,
+};
+
+static struct miscdevice opal_prd_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "opal-prd",
+ .fops = &opal_prd_fops,
+};
+
+/* opal interface */
+static int opal_prd_msg_notifier(struct notifier_block *nb,
+ unsigned long msg_type, void *_msg)
+{
+ struct opal_prd_msg_queue_item *item;
+ struct opal_prd_msg_header *hdr;
+ struct opal_msg *msg = _msg;
+ int msg_size, item_size;
+ unsigned long flags;
+
+ if (msg_type != OPAL_MSG_PRD && msg_type != OPAL_MSG_PRD2)
+ return 0;
+
+ /* Calculate total size of the message and item we need to store. The
+ * 'size' field in the header includes the header itself. */
+ hdr = (void *)msg->params;
+ msg_size = be16_to_cpu(hdr->size);
+ item_size = msg_size + sizeof(*item) - sizeof(item->msg);
+
+ item = kzalloc(item_size, GFP_ATOMIC);
+ if (!item)
+ return -ENOMEM;
+
+ memcpy(&item->msg.data, msg->params, msg_size);
+
+ spin_lock_irqsave(&opal_prd_msg_queue_lock, flags);
+ list_add_tail(&item->list, &opal_prd_msg_queue);
+ spin_unlock_irqrestore(&opal_prd_msg_queue_lock, flags);
+
+ wake_up_interruptible(&opal_prd_msg_wait);
+
+ return 0;
+}
+
+static struct notifier_block opal_prd_event_nb = {
+ .notifier_call = opal_prd_msg_notifier,
+ .next = NULL,
+ .priority = 0,
+};
+
+static struct notifier_block opal_prd_event_nb2 = {
+ .notifier_call = opal_prd_msg_notifier,
+ .next = NULL,
+ .priority = 0,
+};
+
+static int opal_prd_probe(struct platform_device *pdev)
+{
+ int rc;
+
+ if (!pdev || !pdev->dev.of_node)
+ return -ENODEV;
+
+ /* We should only have one prd driver instance per machine; ensure
+ * that we only get a valid probe on a single OF node.
+ */
+ if (prd_node)
+ return -EBUSY;
+
+ prd_node = pdev->dev.of_node;
+
+ rc = opal_message_notifier_register(OPAL_MSG_PRD, &opal_prd_event_nb);
+ if (rc) {
+ pr_err("Couldn't register event notifier\n");
+ return rc;
+ }
+
+ rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb2);
+ if (rc) {
+ pr_err("Couldn't register PRD2 event notifier\n");
+ opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb);
+ return rc;
+ }
+
+ rc = misc_register(&opal_prd_dev);
+ if (rc) {
+ pr_err("failed to register miscdev\n");
+ opal_message_notifier_unregister(OPAL_MSG_PRD,
+ &opal_prd_event_nb);
+ opal_message_notifier_unregister(OPAL_MSG_PRD2,
+ &opal_prd_event_nb2);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int opal_prd_remove(struct platform_device *pdev)
+{
+ misc_deregister(&opal_prd_dev);
+ opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb);
+ opal_message_notifier_unregister(OPAL_MSG_PRD2, &opal_prd_event_nb2);
+ return 0;
+}
+
+static const struct of_device_id opal_prd_match[] = {
+ { .compatible = "ibm,opal-prd" },
+ { },
+};
+
+static struct platform_driver opal_prd_driver = {
+ .driver = {
+ .name = "opal-prd",
+ .of_match_table = opal_prd_match,
+ },
+ .probe = opal_prd_probe,
+ .remove = opal_prd_remove,
+};
+
+module_platform_driver(opal_prd_driver);
+
+MODULE_DEVICE_TABLE(of, opal_prd_match);
+MODULE_DESCRIPTION("PowerNV OPAL runtime diagnostic driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c
new file mode 100644
index 000000000..6441e17b6
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-psr.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL Power-Shift-Ratio interface
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "opal-psr: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+static DEFINE_MUTEX(psr_mutex);
+
+static struct kobject *psr_kobj;
+
+static struct psr_attr {
+ u32 handle;
+ struct kobj_attribute attr;
+} *psr_attrs;
+
+static ssize_t psr_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
+ struct opal_msg msg;
+ int psr, ret, token;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&psr_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_get_power_shift_ratio(psr_attr->handle, token,
+ (u32 *)__pa(&psr));
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret) {
+ ret = sprintf(buf, "%u\n", be32_to_cpu(psr));
+ if (ret < 0)
+ ret = -EIO;
+ }
+ break;
+ case OPAL_SUCCESS:
+ ret = sprintf(buf, "%u\n", be32_to_cpu(psr));
+ if (ret < 0)
+ ret = -EIO;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&psr_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static ssize_t psr_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr);
+ struct opal_msg msg;
+ int psr, ret, token;
+
+ ret = kstrtoint(buf, 0, &psr);
+ if (ret)
+ return ret;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&psr_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_set_power_shift_ratio(psr_attr->handle, token, psr);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret)
+ ret = count;
+ break;
+ case OPAL_SUCCESS:
+ ret = count;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&psr_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+void __init opal_psr_init(void)
+{
+ struct device_node *psr, *node;
+ int i = 0;
+
+ psr = of_find_compatible_node(NULL, NULL,
+ "ibm,opal-power-shift-ratio");
+ if (!psr) {
+ pr_devel("Power-shift-ratio node not found\n");
+ return;
+ }
+
+ psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs),
+ GFP_KERNEL);
+ if (!psr_attrs)
+ goto out_put_psr;
+
+ psr_kobj = kobject_create_and_add("psr", opal_kobj);
+ if (!psr_kobj) {
+ pr_warn("Failed to create psr kobject\n");
+ goto out;
+ }
+
+ for_each_child_of_node(psr, node) {
+ if (of_property_read_u32(node, "handle",
+ &psr_attrs[i].handle))
+ goto out_kobj;
+
+ sysfs_attr_init(&psr_attrs[i].attr.attr);
+ if (of_property_read_string(node, "label",
+ &psr_attrs[i].attr.attr.name))
+ goto out_kobj;
+ psr_attrs[i].attr.attr.mode = 0664;
+ psr_attrs[i].attr.show = psr_show;
+ psr_attrs[i].attr.store = psr_store;
+ if (sysfs_create_file(psr_kobj, &psr_attrs[i].attr.attr)) {
+ pr_devel("Failed to create psr sysfs file %s\n",
+ psr_attrs[i].attr.attr.name);
+ goto out_kobj;
+ }
+ i++;
+ }
+ of_node_put(psr);
+
+ return;
+out_kobj:
+ of_node_put(node);
+ kobject_put(psr_kobj);
+out:
+ kfree(psr_attrs);
+out_put_psr:
+ of_node_put(psr);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
new file mode 100644
index 000000000..79011a263
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV Real Time Clock.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/bcd.h>
+#include <linux/rtc.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include <asm/opal.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+
+static void __init opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
+{
+ tm->tm_year = ((bcd2bin(y_m_d >> 24) * 100) +
+ bcd2bin((y_m_d >> 16) & 0xff)) - 1900;
+ tm->tm_mon = bcd2bin((y_m_d >> 8) & 0xff) - 1;
+ tm->tm_mday = bcd2bin(y_m_d & 0xff);
+ tm->tm_hour = bcd2bin((h_m_s_ms >> 56) & 0xff);
+ tm->tm_min = bcd2bin((h_m_s_ms >> 48) & 0xff);
+ tm->tm_sec = bcd2bin((h_m_s_ms >> 40) & 0xff);
+ tm->tm_wday = -1;
+}
+
+time64_t __init opal_get_boot_time(void)
+{
+ struct rtc_time tm;
+ u32 y_m_d;
+ u64 h_m_s_ms;
+ __be32 __y_m_d;
+ __be64 __h_m_s_ms;
+ long rc = OPAL_BUSY;
+
+ if (!opal_check_token(OPAL_RTC_READ))
+ return 0;
+
+ while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+ rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
+ if (rc == OPAL_BUSY_EVENT) {
+ mdelay(OPAL_BUSY_DELAY_MS);
+ opal_poll_events(NULL);
+ } else if (rc == OPAL_BUSY) {
+ mdelay(OPAL_BUSY_DELAY_MS);
+ }
+ }
+ if (rc != OPAL_SUCCESS)
+ return 0;
+
+ y_m_d = be32_to_cpu(__y_m_d);
+ h_m_s_ms = be64_to_cpu(__h_m_s_ms);
+ opal_to_tm(y_m_d, h_m_s_ms, &tm);
+ return rtc_tm_to_time64(&tm);
+}
+
+static __init int opal_time_init(void)
+{
+ struct platform_device *pdev;
+ struct device_node *rtc;
+
+ rtc = of_find_node_by_path("/ibm,opal/rtc");
+ if (rtc) {
+ pdev = of_platform_device_create(rtc, "opal-rtc", NULL);
+ of_node_put(rtc);
+ } else {
+ if (opal_check_token(OPAL_RTC_READ) ||
+ opal_check_token(OPAL_READ_TPO))
+ pdev = platform_device_register_simple("opal-rtc", -1,
+ NULL, 0);
+ else
+ return -ENODEV;
+ }
+
+ return PTR_ERR_OR_ZERO(pdev);
+}
+machine_subsys_initcall(powernv, opal_time_init);
diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c
new file mode 100644
index 000000000..6ac410f4d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-secvar.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PowerNV code for secure variables
+ *
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Claudio Carvalho
+ * Nayna Jain
+ *
+ * APIs to access secure variables managed by OPAL.
+ */
+
+#define pr_fmt(fmt) "secvar: "fmt
+
+#include <linux/types.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <asm/opal.h>
+#include <asm/secvar.h>
+#include <asm/secure_boot.h>
+
+static int opal_status_to_err(int rc)
+{
+ int err;
+
+ switch (rc) {
+ case OPAL_SUCCESS:
+ err = 0;
+ break;
+ case OPAL_UNSUPPORTED:
+ err = -ENXIO;
+ break;
+ case OPAL_PARAMETER:
+ err = -EINVAL;
+ break;
+ case OPAL_RESOURCE:
+ err = -ENOSPC;
+ break;
+ case OPAL_HARDWARE:
+ err = -EIO;
+ break;
+ case OPAL_NO_MEM:
+ err = -ENOMEM;
+ break;
+ case OPAL_EMPTY:
+ err = -ENOENT;
+ break;
+ case OPAL_PARTIAL:
+ err = -EFBIG;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int opal_get_variable(const char *key, u64 ksize, u8 *data, u64 *dsize)
+{
+ int rc;
+
+ if (!key || !dsize)
+ return -EINVAL;
+
+ *dsize = cpu_to_be64(*dsize);
+
+ rc = opal_secvar_get(key, ksize, data, dsize);
+
+ *dsize = be64_to_cpu(*dsize);
+
+ return opal_status_to_err(rc);
+}
+
+static int opal_get_next_variable(const char *key, u64 *keylen, u64 keybufsize)
+{
+ int rc;
+
+ if (!key || !keylen)
+ return -EINVAL;
+
+ *keylen = cpu_to_be64(*keylen);
+
+ rc = opal_secvar_get_next(key, keylen, keybufsize);
+
+ *keylen = be64_to_cpu(*keylen);
+
+ return opal_status_to_err(rc);
+}
+
+static int opal_set_variable(const char *key, u64 ksize, u8 *data, u64 dsize)
+{
+ int rc;
+
+ if (!key || !data)
+ return -EINVAL;
+
+ rc = opal_secvar_enqueue_update(key, ksize, data, dsize);
+
+ return opal_status_to_err(rc);
+}
+
+static ssize_t opal_secvar_format(char *buf, size_t bufsize)
+{
+ ssize_t rc = 0;
+ struct device_node *node;
+ const char *format;
+
+ node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+ if (!of_device_is_available(node)) {
+ rc = -ENODEV;
+ goto out;
+ }
+
+ rc = of_property_read_string(node, "format", &format);
+ if (rc)
+ goto out;
+
+ rc = snprintf(buf, bufsize, "%s", format);
+
+out:
+ of_node_put(node);
+
+ return rc;
+}
+
+static int opal_secvar_max_size(u64 *max_size)
+{
+ int rc;
+ struct device_node *node;
+
+ node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+ if (!node)
+ return -ENODEV;
+
+ if (!of_device_is_available(node)) {
+ rc = -ENODEV;
+ goto out;
+ }
+
+ rc = of_property_read_u64(node, "max-var-size", max_size);
+
+out:
+ of_node_put(node);
+ return rc;
+}
+
+static const struct secvar_operations opal_secvar_ops = {
+ .get = opal_get_variable,
+ .get_next = opal_get_next_variable,
+ .set = opal_set_variable,
+ .format = opal_secvar_format,
+ .max_size = opal_secvar_max_size,
+};
+
+static int opal_secvar_probe(struct platform_device *pdev)
+{
+ if (!opal_check_token(OPAL_SECVAR_GET)
+ || !opal_check_token(OPAL_SECVAR_GET_NEXT)
+ || !opal_check_token(OPAL_SECVAR_ENQUEUE_UPDATE)) {
+ pr_err("OPAL doesn't support secure variables\n");
+ return -ENODEV;
+ }
+
+ return set_secvar_ops(&opal_secvar_ops);
+}
+
+static const struct of_device_id opal_secvar_match[] = {
+ { .compatible = "ibm,secvar-backend",},
+ {},
+};
+
+static struct platform_driver opal_secvar_driver = {
+ .driver = {
+ .name = "secvar",
+ .of_match_table = opal_secvar_match,
+ },
+};
+
+static int __init opal_secvar_init(void)
+{
+ return platform_driver_probe(&opal_secvar_driver, opal_secvar_probe);
+}
+device_initcall(opal_secvar_init);
diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
new file mode 100644
index 000000000..9944376b1
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL Sensor-groups interface
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "opal-sensor-groups: " fmt
+
+#include <linux/of.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+
+#include <asm/opal.h>
+
+static DEFINE_MUTEX(sg_mutex);
+
+static struct kobject *sg_kobj;
+
+struct sg_attr {
+ u32 handle;
+ struct kobj_attribute attr;
+};
+
+static struct sensor_group {
+ char name[20];
+ struct attribute_group sg;
+ struct sg_attr *sgattrs;
+} *sgs;
+
+int sensor_group_enable(u32 handle, bool enable)
+{
+ struct opal_msg msg;
+ int token, ret;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0)
+ return token;
+
+ ret = opal_sensor_group_enable(handle, token, enable);
+ if (ret == OPAL_ASYNC_COMPLETION) {
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ } else {
+ ret = opal_error_code(ret);
+ }
+
+out:
+ opal_async_release_token(token);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sensor_group_enable);
+
+static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct sg_attr *sattr = container_of(attr, struct sg_attr, attr);
+ struct opal_msg msg;
+ u32 data;
+ int ret, token;
+
+ ret = kstrtoint(buf, 0, &data);
+ if (ret)
+ return ret;
+
+ if (data != 1)
+ return -EINVAL;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ pr_devel("Failed to get token\n");
+ return token;
+ }
+
+ ret = mutex_lock_interruptible(&sg_mutex);
+ if (ret)
+ goto out_token;
+
+ ret = opal_sensor_group_clear(sattr->handle, token);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_devel("Failed to wait for the async response\n");
+ ret = -EIO;
+ goto out;
+ }
+ ret = opal_error_code(opal_get_async_rc(msg));
+ if (!ret)
+ ret = count;
+ break;
+ case OPAL_SUCCESS:
+ ret = count;
+ break;
+ default:
+ ret = opal_error_code(ret);
+ }
+
+out:
+ mutex_unlock(&sg_mutex);
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+
+static struct sg_ops_info {
+ int opal_no;
+ const char *attr_name;
+ ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count);
+} ops_info[] = {
+ { OPAL_SENSOR_GROUP_CLEAR, "clear", sg_store },
+};
+
+static void add_attr(int handle, struct sg_attr *attr, int index)
+{
+ attr->handle = handle;
+ sysfs_attr_init(&attr->attr.attr);
+ attr->attr.attr.name = ops_info[index].attr_name;
+ attr->attr.attr.mode = 0220;
+ attr->attr.store = ops_info[index].store;
+}
+
+static int __init add_attr_group(const __be32 *ops, int len, struct sensor_group *sg,
+ u32 handle)
+{
+ int i, j;
+ int count = 0;
+
+ for (i = 0; i < len; i++)
+ for (j = 0; j < ARRAY_SIZE(ops_info); j++)
+ if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) {
+ add_attr(handle, &sg->sgattrs[count], j);
+ sg->sg.attrs[count] =
+ &sg->sgattrs[count].attr.attr;
+ count++;
+ }
+
+ return sysfs_create_group(sg_kobj, &sg->sg);
+}
+
+static int __init get_nr_attrs(const __be32 *ops, int len)
+{
+ int i, j;
+ int nr_attrs = 0;
+
+ for (i = 0; i < len; i++)
+ for (j = 0; j < ARRAY_SIZE(ops_info); j++)
+ if (be32_to_cpu(ops[i]) == ops_info[j].opal_no)
+ nr_attrs++;
+
+ return nr_attrs;
+}
+
+void __init opal_sensor_groups_init(void)
+{
+ struct device_node *sg, *node;
+ int i = 0;
+
+ sg = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group");
+ if (!sg) {
+ pr_devel("Sensor groups node not found\n");
+ return;
+ }
+
+ sgs = kcalloc(of_get_child_count(sg), sizeof(*sgs), GFP_KERNEL);
+ if (!sgs)
+ goto out_sg_put;
+
+ sg_kobj = kobject_create_and_add("sensor_groups", opal_kobj);
+ if (!sg_kobj) {
+ pr_warn("Failed to create sensor group kobject\n");
+ goto out_sgs;
+ }
+
+ for_each_child_of_node(sg, node) {
+ const __be32 *ops;
+ u32 sgid, len, nr_attrs, chipid;
+
+ ops = of_get_property(node, "ops", &len);
+ if (!ops)
+ continue;
+
+ nr_attrs = get_nr_attrs(ops, len);
+ if (!nr_attrs)
+ continue;
+
+ sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(*sgs[i].sgattrs),
+ GFP_KERNEL);
+ if (!sgs[i].sgattrs)
+ goto out_sgs_sgattrs;
+
+ sgs[i].sg.attrs = kcalloc(nr_attrs + 1,
+ sizeof(*sgs[i].sg.attrs),
+ GFP_KERNEL);
+
+ if (!sgs[i].sg.attrs) {
+ kfree(sgs[i].sgattrs);
+ goto out_sgs_sgattrs;
+ }
+
+ if (of_property_read_u32(node, "sensor-group-id", &sgid)) {
+ pr_warn("sensor-group-id property not found\n");
+ goto out_sgs_sgattrs;
+ }
+
+ if (!of_property_read_u32(node, "ibm,chip-id", &chipid))
+ sprintf(sgs[i].name, "%pOFn%d", node, chipid);
+ else
+ sprintf(sgs[i].name, "%pOFn", node);
+
+ sgs[i].sg.name = sgs[i].name;
+ if (add_attr_group(ops, len, &sgs[i], sgid)) {
+ pr_warn("Failed to create sensor attribute group %s\n",
+ sgs[i].sg.name);
+ goto out_sgs_sgattrs;
+ }
+ i++;
+ }
+ of_node_put(sg);
+
+ return;
+
+out_sgs_sgattrs:
+ while (--i >= 0) {
+ kfree(sgs[i].sgattrs);
+ kfree(sgs[i].sg.attrs);
+ }
+ kobject_put(sg_kobj);
+ of_node_put(node);
+out_sgs:
+ kfree(sgs);
+out_sg_put:
+ of_node_put(sg);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c
new file mode 100644
index 000000000..8880a1c14
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sensor.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV sensor code
+ *
+ * Copyright (C) 2013 IBM
+ */
+
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <asm/opal.h>
+#include <asm/machdep.h>
+
+/*
+ * This will return sensor information to driver based on the requested sensor
+ * handle. A handle is an opaque id for the powernv, read by the driver from the
+ * device tree..
+ */
+int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data)
+{
+ int ret, token;
+ struct opal_msg msg;
+ __be32 data;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0)
+ return token;
+
+ ret = opal_sensor_read(sensor_hndl, token, &data);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_err("%s: Failed to wait for the async response, %d\n",
+ __func__, ret);
+ goto out;
+ }
+
+ ret = opal_error_code(opal_get_async_rc(msg));
+ *sensor_data = be32_to_cpu(data);
+ break;
+
+ case OPAL_SUCCESS:
+ ret = 0;
+ *sensor_data = be32_to_cpu(data);
+ break;
+
+ case OPAL_WRONG_STATE:
+ ret = -EIO;
+ break;
+
+ default:
+ ret = opal_error_code(ret);
+ break;
+ }
+
+out:
+ opal_async_release_token(token);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(opal_get_sensor_data);
+
+int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data)
+{
+ int ret, token;
+ struct opal_msg msg;
+ __be64 data;
+
+ if (!opal_check_token(OPAL_SENSOR_READ_U64)) {
+ u32 sdata;
+
+ ret = opal_get_sensor_data(sensor_hndl, &sdata);
+ if (!ret)
+ *sensor_data = sdata;
+ return ret;
+ }
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0)
+ return token;
+
+ ret = opal_sensor_read_u64(sensor_hndl, token, &data);
+ switch (ret) {
+ case OPAL_ASYNC_COMPLETION:
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_err("%s: Failed to wait for the async response, %d\n",
+ __func__, ret);
+ goto out_token;
+ }
+
+ ret = opal_error_code(opal_get_async_rc(msg));
+ *sensor_data = be64_to_cpu(data);
+ break;
+
+ case OPAL_SUCCESS:
+ ret = 0;
+ *sensor_data = be64_to_cpu(data);
+ break;
+
+ case OPAL_WRONG_STATE:
+ ret = -EIO;
+ break;
+
+ default:
+ ret = opal_error_code(ret);
+ break;
+ }
+
+out_token:
+ opal_async_release_token(token);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(opal_get_sensor_data_u64);
+
+int __init opal_sensor_init(void)
+{
+ struct platform_device *pdev;
+ struct device_node *sensor;
+
+ sensor = of_find_node_by_path("/ibm,opal/sensors");
+ if (!sensor) {
+ pr_err("Opal node 'sensors' not found\n");
+ return -ENODEV;
+ }
+
+ pdev = of_platform_device_create(sensor, "opal-sensor", NULL);
+ of_node_put(sensor);
+
+ return PTR_ERR_OR_ZERO(pdev);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c
new file mode 100644
index 000000000..a12312afe
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-sysparam.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV system parameter code
+ *
+ * Copyright (C) 2013 IBM
+ */
+
+#include <linux/kobject.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/gfp.h>
+#include <linux/stat.h>
+#include <asm/opal.h>
+
+#define MAX_PARAM_DATA_LEN 64
+
+static DEFINE_MUTEX(opal_sysparam_mutex);
+static struct kobject *sysparam_kobj;
+static void *param_data_buf;
+
+struct param_attr {
+ struct list_head list;
+ u32 param_id;
+ u32 param_size;
+ struct kobj_attribute kobj_attr;
+};
+
+static ssize_t opal_get_sys_param(u32 param_id, u32 length, void *buffer)
+{
+ struct opal_msg msg;
+ ssize_t ret;
+ int token;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ if (token != -ERESTARTSYS)
+ pr_err("%s: Couldn't get the token, returning\n",
+ __func__);
+ ret = token;
+ goto out;
+ }
+
+ ret = opal_get_param(token, param_id, (u64)buffer, length);
+ if (ret != OPAL_ASYNC_COMPLETION) {
+ ret = opal_error_code(ret);
+ goto out_token;
+ }
+
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_err("%s: Failed to wait for the async response, %zd\n",
+ __func__, ret);
+ goto out_token;
+ }
+
+ ret = opal_error_code(opal_get_async_rc(msg));
+
+out_token:
+ opal_async_release_token(token);
+out:
+ return ret;
+}
+
+static int opal_set_sys_param(u32 param_id, u32 length, void *buffer)
+{
+ struct opal_msg msg;
+ int ret, token;
+
+ token = opal_async_get_token_interruptible();
+ if (token < 0) {
+ if (token != -ERESTARTSYS)
+ pr_err("%s: Couldn't get the token, returning\n",
+ __func__);
+ ret = token;
+ goto out;
+ }
+
+ ret = opal_set_param(token, param_id, (u64)buffer, length);
+
+ if (ret != OPAL_ASYNC_COMPLETION) {
+ ret = opal_error_code(ret);
+ goto out_token;
+ }
+
+ ret = opal_async_wait_response(token, &msg);
+ if (ret) {
+ pr_err("%s: Failed to wait for the async response, %d\n",
+ __func__, ret);
+ goto out_token;
+ }
+
+ ret = opal_error_code(opal_get_async_rc(msg));
+
+out_token:
+ opal_async_release_token(token);
+out:
+ return ret;
+}
+
+static ssize_t sys_param_show(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr, char *buf)
+{
+ struct param_attr *attr = container_of(kobj_attr, struct param_attr,
+ kobj_attr);
+ ssize_t ret;
+
+ mutex_lock(&opal_sysparam_mutex);
+ ret = opal_get_sys_param(attr->param_id, attr->param_size,
+ param_data_buf);
+ if (ret)
+ goto out;
+
+ memcpy(buf, param_data_buf, attr->param_size);
+
+ ret = attr->param_size;
+out:
+ mutex_unlock(&opal_sysparam_mutex);
+ return ret;
+}
+
+static ssize_t sys_param_store(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr, const char *buf, size_t count)
+{
+ struct param_attr *attr = container_of(kobj_attr, struct param_attr,
+ kobj_attr);
+ ssize_t ret;
+
+ /* MAX_PARAM_DATA_LEN is sizeof(param_data_buf) */
+ if (count > MAX_PARAM_DATA_LEN)
+ count = MAX_PARAM_DATA_LEN;
+
+ mutex_lock(&opal_sysparam_mutex);
+ memcpy(param_data_buf, buf, count);
+ ret = opal_set_sys_param(attr->param_id, attr->param_size,
+ param_data_buf);
+ mutex_unlock(&opal_sysparam_mutex);
+ if (!ret)
+ ret = count;
+ return ret;
+}
+
+void __init opal_sys_param_init(void)
+{
+ struct device_node *sysparam;
+ struct param_attr *attr;
+ u32 *id, *size;
+ int count, i;
+ u8 *perm;
+
+ if (!opal_kobj) {
+ pr_warn("SYSPARAM: opal kobject is not available\n");
+ goto out;
+ }
+
+ /* Some systems do not use sysparams; this is not an error */
+ sysparam = of_find_node_by_path("/ibm,opal/sysparams");
+ if (!sysparam)
+ goto out;
+
+ if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) {
+ pr_err("SYSPARAM: Opal sysparam node not compatible\n");
+ goto out_node_put;
+ }
+
+ sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj);
+ if (!sysparam_kobj) {
+ pr_err("SYSPARAM: Failed to create sysparam kobject\n");
+ goto out_node_put;
+ }
+
+ /* Allocate big enough buffer for any get/set transactions */
+ param_data_buf = kzalloc(MAX_PARAM_DATA_LEN, GFP_KERNEL);
+ if (!param_data_buf) {
+ pr_err("SYSPARAM: Failed to allocate memory for param data "
+ "buf\n");
+ goto out_kobj_put;
+ }
+
+ /* Number of parameters exposed through DT */
+ count = of_property_count_strings(sysparam, "param-name");
+ if (count < 0) {
+ pr_err("SYSPARAM: No string found of property param-name in "
+ "the node %pOFn\n", sysparam);
+ goto out_param_buf;
+ }
+
+ id = kcalloc(count, sizeof(*id), GFP_KERNEL);
+ if (!id) {
+ pr_err("SYSPARAM: Failed to allocate memory to read parameter "
+ "id\n");
+ goto out_param_buf;
+ }
+
+ size = kcalloc(count, sizeof(*size), GFP_KERNEL);
+ if (!size) {
+ pr_err("SYSPARAM: Failed to allocate memory to read parameter "
+ "size\n");
+ goto out_free_id;
+ }
+
+ perm = kcalloc(count, sizeof(*perm), GFP_KERNEL);
+ if (!perm) {
+ pr_err("SYSPARAM: Failed to allocate memory to read supported "
+ "action on the parameter");
+ goto out_free_size;
+ }
+
+ if (of_property_read_u32_array(sysparam, "param-id", id, count)) {
+ pr_err("SYSPARAM: Missing property param-id in the DT\n");
+ goto out_free_perm;
+ }
+
+ if (of_property_read_u32_array(sysparam, "param-len", size, count)) {
+ pr_err("SYSPARAM: Missing property param-len in the DT\n");
+ goto out_free_perm;
+ }
+
+
+ if (of_property_read_u8_array(sysparam, "param-perm", perm, count)) {
+ pr_err("SYSPARAM: Missing property param-perm in the DT\n");
+ goto out_free_perm;
+ }
+
+ attr = kcalloc(count, sizeof(*attr), GFP_KERNEL);
+ if (!attr) {
+ pr_err("SYSPARAM: Failed to allocate memory for parameter "
+ "attributes\n");
+ goto out_free_perm;
+ }
+
+ /* For each of the parameters, populate the parameter attributes */
+ for (i = 0; i < count; i++) {
+ if (size[i] > MAX_PARAM_DATA_LEN) {
+ pr_warn("SYSPARAM: Not creating parameter %d as size "
+ "exceeds buffer length\n", i);
+ continue;
+ }
+
+ sysfs_attr_init(&attr[i].kobj_attr.attr);
+ attr[i].param_id = id[i];
+ attr[i].param_size = size[i];
+ if (of_property_read_string_index(sysparam, "param-name", i,
+ &attr[i].kobj_attr.attr.name))
+ continue;
+
+ /* If the parameter is read-only or read-write */
+ switch (perm[i] & 3) {
+ case OPAL_SYSPARAM_READ:
+ attr[i].kobj_attr.attr.mode = 0444;
+ break;
+ case OPAL_SYSPARAM_WRITE:
+ attr[i].kobj_attr.attr.mode = 0200;
+ break;
+ case OPAL_SYSPARAM_RW:
+ attr[i].kobj_attr.attr.mode = 0644;
+ break;
+ default:
+ break;
+ }
+
+ attr[i].kobj_attr.show = sys_param_show;
+ attr[i].kobj_attr.store = sys_param_store;
+
+ if (sysfs_create_file(sysparam_kobj, &attr[i].kobj_attr.attr)) {
+ pr_err("SYSPARAM: Failed to create sysfs file %s\n",
+ attr[i].kobj_attr.attr.name);
+ goto out_free_attr;
+ }
+ }
+
+ kfree(perm);
+ kfree(size);
+ kfree(id);
+ of_node_put(sysparam);
+ return;
+
+out_free_attr:
+ kfree(attr);
+out_free_perm:
+ kfree(perm);
+out_free_size:
+ kfree(size);
+out_free_id:
+ kfree(id);
+out_param_buf:
+ kfree(param_data_buf);
+out_kobj_put:
+ kobject_put(sysparam_kobj);
+out_node_put:
+ of_node_put(sysparam);
+out:
+ return;
+}
diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c
new file mode 100644
index 000000000..91b36541b
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/percpu.h>
+#include <linux/jump_label.h>
+#include <asm/trace.h>
+
+#ifdef CONFIG_JUMP_LABEL
+struct static_key opal_tracepoint_key = STATIC_KEY_INIT;
+
+int opal_tracepoint_regfunc(void)
+{
+ static_key_slow_inc(&opal_tracepoint_key);
+ return 0;
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+ static_key_slow_dec(&opal_tracepoint_key);
+}
+#else
+/*
+ * We optimise OPAL calls by placing opal_tracepoint_refcount
+ * directly in the TOC so we can check if the opal tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long opal_tracepoint_refcount;
+
+int opal_tracepoint_regfunc(void)
+{
+ opal_tracepoint_refcount++;
+ return 0;
+}
+
+void opal_tracepoint_unregfunc(void)
+{
+ opal_tracepoint_refcount--;
+}
+#endif
+
+/*
+ * Since the tracing code might execute OPAL calls we need to guard against
+ * recursion.
+ */
+static DEFINE_PER_CPU(unsigned int, opal_trace_depth);
+
+void __trace_opal_entry(unsigned long opcode, unsigned long *args)
+{
+ unsigned long flags;
+ unsigned int *depth;
+
+ local_irq_save(flags);
+
+ depth = this_cpu_ptr(&opal_trace_depth);
+
+ if (*depth)
+ goto out;
+
+ (*depth)++;
+ preempt_disable();
+ trace_opal_entry(opcode, args);
+ (*depth)--;
+
+out:
+ local_irq_restore(flags);
+}
+
+void __trace_opal_exit(long opcode, unsigned long retval)
+{
+ unsigned long flags;
+ unsigned int *depth;
+
+ local_irq_save(flags);
+
+ depth = this_cpu_ptr(&opal_trace_depth);
+
+ if (*depth)
+ goto out;
+
+ (*depth)++;
+ trace_opal_exit(opcode, retval);
+ preempt_enable();
+ (*depth)--;
+
+out:
+ local_irq_restore(flags);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
new file mode 100644
index 000000000..0ed95f753
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PowerNV OPAL API wrappers
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#include <linux/jump_label.h>
+#include <asm/ppc_asm.h>
+#include <asm/hvcall.h>
+#include <asm/asm-offsets.h>
+#include <asm/opal.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+ .section ".text"
+
+/*
+ * r3-r10 - OPAL call arguments
+ * STK_PARAM(R11) - OPAL opcode
+ * STK_PARAM(R12) - MSR to restore
+ */
+_GLOBAL_TOC(__opal_call)
+ mflr r0
+ std r0,PPC_LR_STKOFF(r1)
+ ld r12,STK_PARAM(R12)(r1)
+ li r0,MSR_IR|MSR_DR|MSR_LE
+ andc r12,r12,r0
+ LOAD_REG_ADDR(r11, opal_return)
+ mtlr r11
+ LOAD_REG_ADDR(r11, opal)
+ ld r2,0(r11)
+ ld r11,8(r11)
+ mtspr SPRN_HSRR0,r11
+ mtspr SPRN_HSRR1,r12
+ /* set token to r0 */
+ ld r0,STK_PARAM(R11)(r1)
+ hrfid
+opal_return:
+ /*
+ * Restore MSR on OPAL return. The MSR is set to big-endian.
+ */
+#ifdef __BIG_ENDIAN__
+ ld r11,STK_PARAM(R12)(r1)
+ mtmsrd r11
+#else
+ /* Endian can only be switched with rfi, must byte reverse MSR load */
+ .short 0x4039 /* li r10,STK_PARAM(R12) */
+ .byte (STK_PARAM(R12) >> 8) & 0xff
+ .byte STK_PARAM(R12) & 0xff
+
+ .long 0x280c6a7d /* ldbrx r11,r10,r1 */
+ .long 0x05009f42 /* bcl 20,31,$+4 */
+ .long 0xa602487d /* mflr r10 */
+ .long 0x14004a39 /* addi r10,r10,20 */
+ .long 0xa64b5a7d /* mthsrr0 r10 */
+ .long 0xa64b7b7d /* mthsrr1 r11 */
+ .long 0x2402004c /* hrfid */
+#endif
+ LOAD_PACA_TOC()
+ ld r0,PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
new file mode 100644
index 000000000..748c2b97f
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV SCOM bus debugfs interface
+ *
+ * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
+ * <benh@kernel.crashing.org>
+ * and David Gibson, IBM Corporation.
+ * Copyright 2013 IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/bug.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/opal.h>
+#include <asm/prom.h>
+
+static u64 opal_scom_unmangle(u64 addr)
+{
+ u64 tmp;
+
+ /*
+ * XSCOM addresses use the top nibble to set indirect mode and
+ * its form. Bits 4-11 are always 0.
+ *
+ * Because the debugfs interface uses signed offsets and shifts
+ * the address left by 3, we basically cannot use the top 4 bits
+ * of the 64-bit address, and thus cannot use the indirect bit.
+ *
+ * To deal with that, we support the indirect bits being in
+ * bits 4-7 (IBM notation) instead of bit 0-3 in this API, we
+ * do the conversion here.
+ *
+ * For in-kernel use, we don't need to do this mangling. In
+ * kernel won't have bits 4-7 set.
+ *
+ * So:
+ * debugfs will always set 0-3 = 0 and clear 4-7
+ * kernel will always clear 0-3 = 0 and set 4-7
+ */
+ tmp = addr;
+ tmp &= 0x0f00000000000000;
+ addr &= 0xf0ffffffffffffff;
+ addr |= tmp << 4;
+
+ return addr;
+}
+
+static int opal_scom_read(uint32_t chip, uint64_t addr, u64 reg, u64 *value)
+{
+ int64_t rc;
+ __be64 v;
+
+ reg = opal_scom_unmangle(addr + reg);
+ rc = opal_xscom_read(chip, reg, (__be64 *)__pa(&v));
+ if (rc) {
+ *value = 0xfffffffffffffffful;
+ return -EIO;
+ }
+ *value = be64_to_cpu(v);
+ return 0;
+}
+
+static int opal_scom_write(uint32_t chip, uint64_t addr, u64 reg, u64 value)
+{
+ int64_t rc;
+
+ reg = opal_scom_unmangle(addr + reg);
+ rc = opal_xscom_write(chip, reg, value);
+ if (rc)
+ return -EIO;
+ return 0;
+}
+
+struct scom_debug_entry {
+ u32 chip;
+ struct debugfs_blob_wrapper path;
+ char name[16];
+};
+
+static ssize_t scom_debug_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct scom_debug_entry *ent = filp->private_data;
+ u64 __user *ubuf64 = (u64 __user *)ubuf;
+ loff_t off = *ppos;
+ ssize_t done = 0;
+ u64 reg, reg_base, reg_cnt, val;
+ int rc;
+
+ if (off < 0 || (off & 7) || (count & 7))
+ return -EINVAL;
+ reg_base = off >> 3;
+ reg_cnt = count >> 3;
+
+ for (reg = 0; reg < reg_cnt; reg++) {
+ rc = opal_scom_read(ent->chip, reg_base, reg, &val);
+ if (!rc)
+ rc = put_user(val, ubuf64);
+ if (rc) {
+ if (!done)
+ done = rc;
+ break;
+ }
+ ubuf64++;
+ *ppos += 8;
+ done += 8;
+ }
+ return done;
+}
+
+static ssize_t scom_debug_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct scom_debug_entry *ent = filp->private_data;
+ u64 __user *ubuf64 = (u64 __user *)ubuf;
+ loff_t off = *ppos;
+ ssize_t done = 0;
+ u64 reg, reg_base, reg_cnt, val;
+ int rc;
+
+ if (off < 0 || (off & 7) || (count & 7))
+ return -EINVAL;
+ reg_base = off >> 3;
+ reg_cnt = count >> 3;
+
+ for (reg = 0; reg < reg_cnt; reg++) {
+ rc = get_user(val, ubuf64);
+ if (!rc)
+ rc = opal_scom_write(ent->chip, reg_base, reg, val);
+ if (rc) {
+ if (!done)
+ done = rc;
+ break;
+ }
+ ubuf64++;
+ done += 8;
+ }
+ return done;
+}
+
+static const struct file_operations scom_debug_fops = {
+ .read = scom_debug_read,
+ .write = scom_debug_write,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static int scom_debug_init_one(struct dentry *root, struct device_node *dn,
+ int chip)
+{
+ struct scom_debug_entry *ent;
+ struct dentry *dir;
+
+ ent = kzalloc(sizeof(*ent), GFP_KERNEL);
+ if (!ent)
+ return -ENOMEM;
+
+ ent->chip = chip;
+ snprintf(ent->name, 16, "%08x", chip);
+ ent->path.data = (void *)kasprintf(GFP_KERNEL, "%pOF", dn);
+ if (!ent->path.data) {
+ kfree(ent);
+ return -ENOMEM;
+ }
+
+ ent->path.size = strlen((char *)ent->path.data);
+
+ dir = debugfs_create_dir(ent->name, root);
+ if (IS_ERR(dir)) {
+ kfree(ent->path.data);
+ kfree(ent);
+ return -1;
+ }
+
+ debugfs_create_blob("devspec", 0400, dir, &ent->path);
+ debugfs_create_file("access", 0600, dir, ent, &scom_debug_fops);
+
+ return 0;
+}
+
+static int scom_debug_init(void)
+{
+ struct device_node *dn;
+ struct dentry *root;
+ int chip, rc;
+
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
+ return 0;
+
+ root = debugfs_create_dir("scom", arch_debugfs_dir);
+ if (IS_ERR(root))
+ return -1;
+
+ rc = 0;
+ for_each_node_with_property(dn, "scom-controller") {
+ chip = of_get_ibm_chip_id(dn);
+ WARN_ON(chip == -1);
+ rc |= scom_debug_init_one(root, dn, chip);
+ }
+
+ return rc;
+}
+device_initcall(scom_debug_init);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
new file mode 100644
index 000000000..cdf3838f0
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -0,0 +1,1251 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL high level interfaces
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "opal: " fmt
+
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/kobject.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/kmsg_dump.h>
+#include <linux/console.h>
+#include <linux/sched/debug.h>
+
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/firmware.h>
+#include <asm/mce.h>
+#include <asm/imc-pmu.h>
+#include <asm/bug.h>
+
+#include "powernv.h"
+
+#define OPAL_MSG_QUEUE_MAX 16
+
+struct opal_msg_node {
+ struct list_head list;
+ struct opal_msg msg;
+};
+
+static DEFINE_SPINLOCK(msg_list_lock);
+static LIST_HEAD(msg_list);
+
+/* /sys/firmware/opal */
+struct kobject *opal_kobj;
+
+struct opal {
+ u64 base;
+ u64 entry;
+ u64 size;
+} opal;
+
+struct mcheck_recoverable_range {
+ u64 start_addr;
+ u64 end_addr;
+ u64 recover_addr;
+};
+
+static int msg_list_size;
+
+static struct mcheck_recoverable_range *mc_recoverable_range;
+static int mc_recoverable_range_len;
+
+struct device_node *opal_node;
+static DEFINE_SPINLOCK(opal_write_lock);
+static struct atomic_notifier_head opal_msg_notifier_head[OPAL_MSG_TYPE_MAX];
+static uint32_t opal_heartbeat;
+static struct task_struct *kopald_tsk;
+static struct opal_msg *opal_msg;
+static u32 opal_msg_size __ro_after_init;
+
+void __init opal_configure_cores(void)
+{
+ u64 reinit_flags = 0;
+
+ /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
+ *
+ * It will preserve non volatile GPRs and HSPRG0/1. It will
+ * also restore HIDs and other SPRs to their original value
+ * but it might clobber a bunch.
+ */
+#ifdef __BIG_ENDIAN__
+ reinit_flags |= OPAL_REINIT_CPUS_HILE_BE;
+#else
+ reinit_flags |= OPAL_REINIT_CPUS_HILE_LE;
+#endif
+
+ /*
+ * POWER9 always support running hash:
+ * ie. Host hash supports hash guests
+ * Host radix supports hash/radix guests
+ */
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+ reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
+ if (early_radix_enabled())
+ reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;
+ }
+
+ opal_reinit_cpus(reinit_flags);
+
+ /* Restore some bits */
+ if (cur_cpu_spec->cpu_restore)
+ cur_cpu_spec->cpu_restore();
+}
+
+int __init early_init_dt_scan_opal(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ const void *basep, *entryp, *sizep;
+ int basesz, entrysz, runtimesz;
+
+ if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
+ return 0;
+
+ basep = of_get_flat_dt_prop(node, "opal-base-address", &basesz);
+ entryp = of_get_flat_dt_prop(node, "opal-entry-address", &entrysz);
+ sizep = of_get_flat_dt_prop(node, "opal-runtime-size", &runtimesz);
+
+ if (!basep || !entryp || !sizep)
+ return 1;
+
+ opal.base = of_read_number(basep, basesz/4);
+ opal.entry = of_read_number(entryp, entrysz/4);
+ opal.size = of_read_number(sizep, runtimesz/4);
+
+ pr_debug("OPAL Base = 0x%llx (basep=%p basesz=%d)\n",
+ opal.base, basep, basesz);
+ pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
+ opal.entry, entryp, entrysz);
+ pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
+ opal.size, sizep, runtimesz);
+
+ if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
+ powerpc_firmware_features |= FW_FEATURE_OPAL;
+ pr_debug("OPAL detected !\n");
+ } else {
+ panic("OPAL != V3 detected, no longer supported.\n");
+ }
+
+ return 1;
+}
+
+int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
+ const char *uname, int depth, void *data)
+{
+ int i, psize, size;
+ const __be32 *prop;
+
+ if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize);
+
+ if (!prop)
+ return 1;
+
+ pr_debug("Found machine check recoverable ranges.\n");
+
+ /*
+ * Calculate number of available entries.
+ *
+ * Each recoverable address range entry is (start address, len,
+ * recovery address), 2 cells each for start and recovery address,
+ * 1 cell for len, totalling 5 cells per entry.
+ */
+ mc_recoverable_range_len = psize / (sizeof(*prop) * 5);
+
+ /* Sanity check */
+ if (!mc_recoverable_range_len)
+ return 1;
+
+ /* Size required to hold all the entries. */
+ size = mc_recoverable_range_len *
+ sizeof(struct mcheck_recoverable_range);
+
+ /*
+ * Allocate a buffer to hold the MC recoverable ranges.
+ */
+ mc_recoverable_range = memblock_alloc(size, __alignof__(u64));
+ if (!mc_recoverable_range)
+ panic("%s: Failed to allocate %u bytes align=0x%lx\n",
+ __func__, size, __alignof__(u64));
+
+ for (i = 0; i < mc_recoverable_range_len; i++) {
+ mc_recoverable_range[i].start_addr =
+ of_read_number(prop + (i * 5) + 0, 2);
+ mc_recoverable_range[i].end_addr =
+ mc_recoverable_range[i].start_addr +
+ of_read_number(prop + (i * 5) + 2, 1);
+ mc_recoverable_range[i].recover_addr =
+ of_read_number(prop + (i * 5) + 3, 2);
+
+ pr_debug("Machine check recoverable range: %llx..%llx: %llx\n",
+ mc_recoverable_range[i].start_addr,
+ mc_recoverable_range[i].end_addr,
+ mc_recoverable_range[i].recover_addr);
+ }
+ return 1;
+}
+
+static int __init opal_register_exception_handlers(void)
+{
+#ifdef __BIG_ENDIAN__
+ u64 glue;
+
+ if (!(powerpc_firmware_features & FW_FEATURE_OPAL))
+ return -ENODEV;
+
+ /* Hookup some exception handlers except machine check. We use the
+ * fwnmi area at 0x7000 to provide the glue space to OPAL
+ */
+ glue = 0x7000;
+
+ /*
+ * Only ancient OPAL firmware requires this.
+ * Specifically, firmware from FW810.00 (released June 2014)
+ * through FW810.20 (Released October 2014).
+ *
+ * Check if we are running on newer (post Oct 2014) firmware that
+ * exports the OPAL_HANDLE_HMI token. If yes, then don't ask OPAL to
+ * patch the HMI interrupt and we catch it directly in Linux.
+ *
+ * For older firmware (i.e < FW810.20), we fallback to old behavior and
+ * let OPAL patch the HMI vector and handle it inside OPAL firmware.
+ *
+ * For newer firmware we catch/handle the HMI directly in Linux.
+ */
+ if (!opal_check_token(OPAL_HANDLE_HMI)) {
+ pr_info("Old firmware detected, OPAL handles HMIs.\n");
+ opal_register_exception_handler(
+ OPAL_HYPERVISOR_MAINTENANCE_HANDLER,
+ 0, glue);
+ glue += 128;
+ }
+
+ /*
+ * Only applicable to ancient firmware, all modern
+ * (post March 2015/skiboot 5.0) firmware will just return
+ * OPAL_UNSUPPORTED.
+ */
+ opal_register_exception_handler(OPAL_SOFTPATCH_HANDLER, 0, glue);
+#endif
+
+ return 0;
+}
+machine_early_initcall(powernv, opal_register_exception_handlers);
+
+static void queue_replay_msg(void *msg)
+{
+ struct opal_msg_node *msg_node;
+
+ if (msg_list_size < OPAL_MSG_QUEUE_MAX) {
+ msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+ if (msg_node) {
+ INIT_LIST_HEAD(&msg_node->list);
+ memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+ list_add_tail(&msg_node->list, &msg_list);
+ msg_list_size++;
+ } else
+ pr_warn_once("message queue no memory\n");
+
+ if (msg_list_size >= OPAL_MSG_QUEUE_MAX)
+ pr_warn_once("message queue full\n");
+ }
+}
+
+static void dequeue_replay_msg(enum opal_msg_type msg_type)
+{
+ struct opal_msg_node *msg_node, *tmp;
+
+ list_for_each_entry_safe(msg_node, tmp, &msg_list, list) {
+ if (be32_to_cpu(msg_node->msg.msg_type) != msg_type)
+ continue;
+
+ atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
+ msg_type,
+ &msg_node->msg);
+
+ list_del(&msg_node->list);
+ kfree(msg_node);
+ msg_list_size--;
+ }
+}
+
+/*
+ * Opal message notifier based on message type. Allow subscribers to get
+ * notified for specific messgae type.
+ */
+int opal_message_notifier_register(enum opal_msg_type msg_type,
+ struct notifier_block *nb)
+{
+ int ret;
+ unsigned long flags;
+
+ if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
+ pr_warn("%s: Invalid arguments, msg_type:%d\n",
+ __func__, msg_type);
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&msg_list_lock, flags);
+ ret = atomic_notifier_chain_register(
+ &opal_msg_notifier_head[msg_type], nb);
+
+ /*
+ * If the registration succeeded, replay any queued messages that came
+ * in prior to the notifier chain registration. msg_list_lock held here
+ * to ensure they're delivered prior to any subsequent messages.
+ */
+ if (ret == 0)
+ dequeue_replay_msg(msg_type);
+
+ spin_unlock_irqrestore(&msg_list_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(opal_message_notifier_register);
+
+int opal_message_notifier_unregister(enum opal_msg_type msg_type,
+ struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(
+ &opal_msg_notifier_head[msg_type], nb);
+}
+EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
+
+static void opal_message_do_notify(uint32_t msg_type, void *msg)
+{
+ unsigned long flags;
+ bool queued = false;
+
+ spin_lock_irqsave(&msg_list_lock, flags);
+ if (opal_msg_notifier_head[msg_type].head == NULL) {
+ /*
+ * Queue up the msg since no notifiers have registered
+ * yet for this msg_type.
+ */
+ queue_replay_msg(msg);
+ queued = true;
+ }
+ spin_unlock_irqrestore(&msg_list_lock, flags);
+
+ if (queued)
+ return;
+
+ /* notify subscribers */
+ atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
+ msg_type, msg);
+}
+
+static void opal_handle_message(void)
+{
+ s64 ret;
+ u32 type;
+
+ ret = opal_get_msg(__pa(opal_msg), opal_msg_size);
+ /* No opal message pending. */
+ if (ret == OPAL_RESOURCE)
+ return;
+
+ /* check for errors. */
+ if (ret) {
+ pr_warn("%s: Failed to retrieve opal message, err=%lld\n",
+ __func__, ret);
+ return;
+ }
+
+ type = be32_to_cpu(opal_msg->msg_type);
+
+ /* Sanity check */
+ if (type >= OPAL_MSG_TYPE_MAX) {
+ pr_warn_once("%s: Unknown message type: %u\n", __func__, type);
+ return;
+ }
+ opal_message_do_notify(type, (void *)opal_msg);
+}
+
+static irqreturn_t opal_message_notify(int irq, void *data)
+{
+ opal_handle_message();
+ return IRQ_HANDLED;
+}
+
+static int __init opal_message_init(struct device_node *opal_node)
+{
+ int ret, i, irq;
+
+ ret = of_property_read_u32(opal_node, "opal-msg-size", &opal_msg_size);
+ if (ret) {
+ pr_notice("Failed to read opal-msg-size property\n");
+ opal_msg_size = sizeof(struct opal_msg);
+ }
+
+ opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
+ if (!opal_msg) {
+ opal_msg_size = sizeof(struct opal_msg);
+ /* Try to allocate fixed message size */
+ opal_msg = kmalloc(opal_msg_size, GFP_KERNEL);
+ BUG_ON(opal_msg == NULL);
+ }
+
+ for (i = 0; i < OPAL_MSG_TYPE_MAX; i++)
+ ATOMIC_INIT_NOTIFIER_HEAD(&opal_msg_notifier_head[i]);
+
+ irq = opal_event_request(ilog2(OPAL_EVENT_MSG_PENDING));
+ if (!irq) {
+ pr_err("%s: Can't register OPAL event irq (%d)\n",
+ __func__, irq);
+ return irq;
+ }
+
+ ret = request_irq(irq, opal_message_notify,
+ IRQ_TYPE_LEVEL_HIGH, "opal-msg", NULL);
+ if (ret) {
+ pr_err("%s: Can't request OPAL event irq (%d)\n",
+ __func__, ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+int opal_get_chars(uint32_t vtermno, char *buf, int count)
+{
+ s64 rc;
+ __be64 evt, len;
+
+ if (!opal.entry)
+ return -ENODEV;
+ opal_poll_events(&evt);
+ if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
+ return 0;
+ len = cpu_to_be64(count);
+ rc = opal_console_read(vtermno, &len, buf);
+ if (rc == OPAL_SUCCESS)
+ return be64_to_cpu(len);
+ return 0;
+}
+
+static int __opal_put_chars(uint32_t vtermno, const char *data, int total_len, bool atomic)
+{
+ unsigned long flags = 0 /* shut up gcc */;
+ int written;
+ __be64 olen;
+ s64 rc;
+
+ if (!opal.entry)
+ return -ENODEV;
+
+ if (atomic)
+ spin_lock_irqsave(&opal_write_lock, flags);
+ rc = opal_console_write_buffer_space(vtermno, &olen);
+ if (rc || be64_to_cpu(olen) < total_len) {
+ /* Closed -> drop characters */
+ if (rc)
+ written = total_len;
+ else
+ written = -EAGAIN;
+ goto out;
+ }
+
+ /* Should not get a partial write here because space is available. */
+ olen = cpu_to_be64(total_len);
+ rc = opal_console_write(vtermno, &olen, data);
+ if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+ if (rc == OPAL_BUSY_EVENT)
+ opal_poll_events(NULL);
+ written = -EAGAIN;
+ goto out;
+ }
+
+ /* Closed or other error drop */
+ if (rc != OPAL_SUCCESS) {
+ written = opal_error_code(rc);
+ goto out;
+ }
+
+ written = be64_to_cpu(olen);
+ if (written < total_len) {
+ if (atomic) {
+ /* Should not happen */
+ pr_warn("atomic console write returned partial "
+ "len=%d written=%d\n", total_len, written);
+ }
+ if (!written)
+ written = -EAGAIN;
+ }
+
+out:
+ if (atomic)
+ spin_unlock_irqrestore(&opal_write_lock, flags);
+
+ return written;
+}
+
+int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
+{
+ return __opal_put_chars(vtermno, data, total_len, false);
+}
+
+/*
+ * opal_put_chars_atomic will not perform partial-writes. Data will be
+ * atomically written to the terminal or not at all. This is not strictly
+ * true at the moment because console space can race with OPAL's console
+ * writes.
+ */
+int opal_put_chars_atomic(uint32_t vtermno, const char *data, int total_len)
+{
+ return __opal_put_chars(vtermno, data, total_len, true);
+}
+
+static s64 __opal_flush_console(uint32_t vtermno)
+{
+ s64 rc;
+
+ if (!opal_check_token(OPAL_CONSOLE_FLUSH)) {
+ __be64 evt;
+
+ /*
+ * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
+ * the console can still be flushed by calling the polling
+ * function while it has OPAL_EVENT_CONSOLE_OUTPUT events.
+ */
+ WARN_ONCE(1, "opal: OPAL_CONSOLE_FLUSH missing.\n");
+
+ opal_poll_events(&evt);
+ if (!(be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_OUTPUT))
+ return OPAL_SUCCESS;
+ return OPAL_BUSY;
+
+ } else {
+ rc = opal_console_flush(vtermno);
+ if (rc == OPAL_BUSY_EVENT) {
+ opal_poll_events(NULL);
+ rc = OPAL_BUSY;
+ }
+ return rc;
+ }
+
+}
+
+/*
+ * opal_flush_console spins until the console is flushed
+ */
+int opal_flush_console(uint32_t vtermno)
+{
+ for (;;) {
+ s64 rc = __opal_flush_console(vtermno);
+
+ if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
+ mdelay(1);
+ continue;
+ }
+
+ return opal_error_code(rc);
+ }
+}
+
+/*
+ * opal_flush_chars is an hvc interface that sleeps until the console is
+ * flushed if wait, otherwise it will return -EBUSY if the console has data,
+ * -EAGAIN if it has data and some of it was flushed.
+ */
+int opal_flush_chars(uint32_t vtermno, bool wait)
+{
+ for (;;) {
+ s64 rc = __opal_flush_console(vtermno);
+
+ if (rc == OPAL_BUSY || rc == OPAL_PARTIAL) {
+ if (wait) {
+ msleep(OPAL_BUSY_DELAY_MS);
+ continue;
+ }
+ if (rc == OPAL_PARTIAL)
+ return -EAGAIN;
+ }
+
+ return opal_error_code(rc);
+ }
+}
+
+static int opal_recover_mce(struct pt_regs *regs,
+ struct machine_check_event *evt)
+{
+ int recovered = 0;
+
+ if (regs_is_unrecoverable(regs)) {
+ /* If MSR_RI isn't set, we cannot recover */
+ pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
+ recovered = 0;
+ } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
+ /* Platform corrected itself */
+ recovered = 1;
+ } else if (evt->severity == MCE_SEV_FATAL) {
+ /* Fatal machine check */
+ pr_err("Machine check interrupt is fatal\n");
+ recovered = 0;
+ }
+
+ if (!recovered && evt->sync_error) {
+ /*
+ * Try to kill processes if we get a synchronous machine check
+ * (e.g., one caused by execution of this instruction). This
+ * will devolve into a panic if we try to kill init or are in
+ * an interrupt etc.
+ *
+ * TODO: Queue up this address for hwpoisioning later.
+ * TODO: This is not quite right for d-side machine
+ * checks ->nip is not necessarily the important
+ * address.
+ */
+ if ((user_mode(regs))) {
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ } else if (die_will_crash()) {
+ /*
+ * die() would kill the kernel, so better to go via
+ * the platform reboot code that will log the
+ * machine check.
+ */
+ recovered = 0;
+ } else {
+ die_mce("Machine check", regs, SIGBUS);
+ recovered = 1;
+ }
+ }
+
+ return recovered;
+}
+
+void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
+{
+ panic_flush_kmsg_start();
+
+ pr_emerg("Hardware platform error: %s\n", msg);
+ if (regs)
+ show_regs(regs);
+ smp_send_stop();
+
+ panic_flush_kmsg_end();
+
+ /*
+ * Don't bother to shut things down because this will
+ * xstop the system.
+ */
+ if (opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, msg)
+ == OPAL_UNSUPPORTED) {
+ pr_emerg("Reboot type %d not supported for %s\n",
+ OPAL_REBOOT_PLATFORM_ERROR, msg);
+ }
+
+ /*
+ * We reached here. There can be three possibilities:
+ * 1. We are running on a firmware level that do not support
+ * opal_cec_reboot2()
+ * 2. We are running on a firmware level that do not support
+ * OPAL_REBOOT_PLATFORM_ERROR reboot type.
+ * 3. We are running on FSP based system that does not need
+ * opal to trigger checkstop explicitly for error analysis.
+ * The FSP PRD component would have already got notified
+ * about this error through other channels.
+ * 4. We are running on a newer skiboot that by default does
+ * not cause a checkstop, drops us back to the kernel to
+ * extract context and state at the time of the error.
+ */
+
+ panic(msg);
+}
+
+int opal_machine_check(struct pt_regs *regs)
+{
+ struct machine_check_event evt;
+
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return 0;
+
+ /* Print things out */
+ if (evt.version != MCE_V1) {
+ pr_err("Machine Check Exception, Unknown event version %d !\n",
+ evt.version);
+ return 0;
+ }
+ machine_check_print_event_info(&evt, user_mode(regs), false);
+
+ if (opal_recover_mce(regs, &evt))
+ return 1;
+
+ pnv_platform_error_reboot(regs, "Unrecoverable Machine Check exception");
+}
+
+/* Early hmi handler called in real mode. */
+int opal_hmi_exception_early(struct pt_regs *regs)
+{
+ s64 rc;
+
+ /*
+ * call opal hmi handler. Pass paca address as token.
+ * The return value OPAL_SUCCESS is an indication that there is
+ * an HMI event generated waiting to pull by Linux.
+ */
+ rc = opal_handle_hmi();
+ if (rc == OPAL_SUCCESS) {
+ local_paca->hmi_event_available = 1;
+ return 1;
+ }
+ return 0;
+}
+
+int opal_hmi_exception_early2(struct pt_regs *regs)
+{
+ s64 rc;
+ __be64 out_flags;
+
+ /*
+ * call opal hmi handler.
+ * Check 64-bit flag mask to find out if an event was generated,
+ * and whether TB is still valid or not etc.
+ */
+ rc = opal_handle_hmi2(&out_flags);
+ if (rc != OPAL_SUCCESS)
+ return 0;
+
+ if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT)
+ local_paca->hmi_event_available = 1;
+ if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL)
+ tb_invalid = true;
+ return 1;
+}
+
+/* HMI exception handler called in virtual mode when irqs are next enabled. */
+int opal_handle_hmi_exception(struct pt_regs *regs)
+{
+ /*
+ * Check if HMI event is available.
+ * if Yes, then wake kopald to process them.
+ */
+ if (!local_paca->hmi_event_available)
+ return 0;
+
+ local_paca->hmi_event_available = 0;
+ opal_wake_poller();
+
+ return 1;
+}
+
+static uint64_t find_recovery_address(uint64_t nip)
+{
+ int i;
+
+ for (i = 0; i < mc_recoverable_range_len; i++)
+ if ((nip >= mc_recoverable_range[i].start_addr) &&
+ (nip < mc_recoverable_range[i].end_addr))
+ return mc_recoverable_range[i].recover_addr;
+ return 0;
+}
+
+bool opal_mce_check_early_recovery(struct pt_regs *regs)
+{
+ uint64_t recover_addr = 0;
+
+ if (!opal.base || !opal.size)
+ goto out;
+
+ if ((regs->nip >= opal.base) &&
+ (regs->nip < (opal.base + opal.size)))
+ recover_addr = find_recovery_address(regs->nip);
+
+ /*
+ * Setup regs->nip to rfi into fixup address.
+ */
+ if (recover_addr)
+ regs_set_return_ip(regs, recover_addr);
+
+out:
+ return !!recover_addr;
+}
+
+static int __init opal_sysfs_init(void)
+{
+ opal_kobj = kobject_create_and_add("opal", firmware_kobj);
+ if (!opal_kobj) {
+ pr_warn("kobject_create_and_add opal failed\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf,
+ loff_t off, size_t count)
+{
+ return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+ bin_attr->size);
+}
+
+static int opal_add_one_export(struct kobject *parent, const char *export_name,
+ struct device_node *np, const char *prop_name)
+{
+ struct bin_attribute *attr = NULL;
+ const char *name = NULL;
+ u64 vals[2];
+ int rc;
+
+ rc = of_property_read_u64_array(np, prop_name, &vals[0], 2);
+ if (rc)
+ goto out;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ name = kstrdup(export_name, GFP_KERNEL);
+ if (!name) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ sysfs_bin_attr_init(attr);
+ attr->attr.name = name;
+ attr->attr.mode = 0400;
+ attr->read = export_attr_read;
+ attr->private = __va(vals[0]);
+ attr->size = vals[1];
+
+ rc = sysfs_create_bin_file(parent, attr);
+out:
+ if (rc) {
+ kfree(name);
+ kfree(attr);
+ }
+
+ return rc;
+}
+
+static void opal_add_exported_attrs(struct device_node *np,
+ struct kobject *kobj)
+{
+ struct device_node *child;
+ struct property *prop;
+
+ for_each_property_of_node(np, prop) {
+ int rc;
+
+ if (!strcmp(prop->name, "name") ||
+ !strcmp(prop->name, "phandle"))
+ continue;
+
+ rc = opal_add_one_export(kobj, prop->name, np, prop->name);
+ if (rc) {
+ pr_warn("Unable to add export %pOF/%s, rc = %d!\n",
+ np, prop->name, rc);
+ }
+ }
+
+ for_each_child_of_node(np, child) {
+ struct kobject *child_kobj;
+
+ child_kobj = kobject_create_and_add(child->name, kobj);
+ if (!child_kobj) {
+ pr_err("Unable to create export dir for %pOF\n", child);
+ continue;
+ }
+
+ opal_add_exported_attrs(child, child_kobj);
+ }
+}
+
+/*
+ * opal_export_attrs: creates a sysfs node for each property listed in
+ * the device-tree under /ibm,opal/firmware/exports/
+ * All new sysfs nodes are created under /opal/exports/.
+ * This allows for reserved memory regions (e.g. HDAT) to be read.
+ * The new sysfs nodes are only readable by root.
+ */
+static void opal_export_attrs(void)
+{
+ struct device_node *np;
+ struct kobject *kobj;
+ int rc;
+
+ np = of_find_node_by_path("/ibm,opal/firmware/exports");
+ if (!np)
+ return;
+
+ /* Create new 'exports' directory - /sys/firmware/opal/exports */
+ kobj = kobject_create_and_add("exports", opal_kobj);
+ if (!kobj) {
+ pr_warn("kobject_create_and_add() of exports failed\n");
+ of_node_put(np);
+ return;
+ }
+
+ opal_add_exported_attrs(np, kobj);
+
+ /*
+ * NB: symbol_map existed before the generic export interface so it
+ * lives under the top level opal_kobj.
+ */
+ rc = opal_add_one_export(opal_kobj, "symbol_map",
+ np->parent, "symbol-map");
+ if (rc)
+ pr_warn("Error %d creating OPAL symbols file\n", rc);
+
+ of_node_put(np);
+}
+
+static void __init opal_dump_region_init(void)
+{
+ void *addr;
+ uint64_t size;
+ int rc;
+
+ if (!opal_check_token(OPAL_REGISTER_DUMP_REGION))
+ return;
+
+ /* Register kernel log buffer */
+ addr = log_buf_addr_get();
+ if (addr == NULL)
+ return;
+
+ size = log_buf_len_get();
+ if (size == 0)
+ return;
+
+ rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
+ __pa(addr), size);
+ /* Don't warn if this is just an older OPAL that doesn't
+ * know about that call
+ */
+ if (rc && rc != OPAL_UNSUPPORTED)
+ pr_warn("DUMP: Failed to register kernel log buffer. "
+ "rc = %d\n", rc);
+}
+
+static void __init opal_pdev_init(const char *compatible)
+{
+ struct device_node *np;
+
+ for_each_compatible_node(np, NULL, compatible)
+ of_platform_device_create(np, NULL, NULL);
+}
+
+static void __init opal_imc_init_dev(void)
+{
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT);
+ if (np)
+ of_platform_device_create(np, NULL, NULL);
+
+ of_node_put(np);
+}
+
+static int kopald(void *unused)
+{
+ unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1;
+
+ set_freezable();
+ do {
+ try_to_freeze();
+
+ opal_handle_events();
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (opal_have_pending_events())
+ __set_current_state(TASK_RUNNING);
+ else
+ schedule_timeout(timeout);
+
+ } while (!kthread_should_stop());
+
+ return 0;
+}
+
+void opal_wake_poller(void)
+{
+ if (kopald_tsk)
+ wake_up_process(kopald_tsk);
+}
+
+static void __init opal_init_heartbeat(void)
+{
+ /* Old firwmware, we assume the HVC heartbeat is sufficient */
+ if (of_property_read_u32(opal_node, "ibm,heartbeat-ms",
+ &opal_heartbeat) != 0)
+ opal_heartbeat = 0;
+
+ if (opal_heartbeat)
+ kopald_tsk = kthread_run(kopald, NULL, "kopald");
+}
+
+static int __init opal_init(void)
+{
+ struct device_node *np, *consoles, *leds;
+ int rc;
+
+ opal_node = of_find_node_by_path("/ibm,opal");
+ if (!opal_node) {
+ pr_warn("Device node not found\n");
+ return -ENODEV;
+ }
+
+ /* Register OPAL consoles if any ports */
+ consoles = of_find_node_by_path("/ibm,opal/consoles");
+ if (consoles) {
+ for_each_child_of_node(consoles, np) {
+ if (!of_node_name_eq(np, "serial"))
+ continue;
+ of_platform_device_create(np, NULL, NULL);
+ }
+ of_node_put(consoles);
+ }
+
+ /* Initialise OPAL messaging system */
+ opal_message_init(opal_node);
+
+ /* Initialise OPAL asynchronous completion interface */
+ opal_async_comp_init();
+
+ /* Initialise OPAL sensor interface */
+ opal_sensor_init();
+
+ /* Initialise OPAL hypervisor maintainence interrupt handling */
+ opal_hmi_handler_init();
+
+ /* Create i2c platform devices */
+ opal_pdev_init("ibm,opal-i2c");
+
+ /* Handle non-volatile memory devices */
+ opal_pdev_init("pmem-region");
+
+ /* Setup a heatbeat thread if requested by OPAL */
+ opal_init_heartbeat();
+
+ /* Detect In-Memory Collection counters and create devices*/
+ opal_imc_init_dev();
+
+ /* Create leds platform devices */
+ leds = of_find_node_by_path("/ibm,opal/leds");
+ if (leds) {
+ of_platform_device_create(leds, "opal_leds", NULL);
+ of_node_put(leds);
+ }
+
+ /* Initialise OPAL message log interface */
+ opal_msglog_init();
+
+ /* Create "opal" kobject under /sys/firmware */
+ rc = opal_sysfs_init();
+ if (rc == 0) {
+ /* Setup dump region interface */
+ opal_dump_region_init();
+ /* Setup error log interface */
+ rc = opal_elog_init();
+ /* Setup code update interface */
+ opal_flash_update_init();
+ /* Setup platform dump extract interface */
+ opal_platform_dump_init();
+ /* Setup system parameters interface */
+ opal_sys_param_init();
+ /* Setup message log sysfs interface. */
+ opal_msglog_sysfs_init();
+ /* Add all export properties*/
+ opal_export_attrs();
+ }
+
+ /* Initialize platform devices: IPMI backend, PRD & flash interface */
+ opal_pdev_init("ibm,opal-ipmi");
+ opal_pdev_init("ibm,opal-flash");
+ opal_pdev_init("ibm,opal-prd");
+
+ /* Initialise platform device: oppanel interface */
+ opal_pdev_init("ibm,opal-oppanel");
+
+ /* Initialise OPAL kmsg dumper for flushing console on panic */
+ opal_kmsg_init();
+
+ /* Initialise OPAL powercap interface */
+ opal_powercap_init();
+
+ /* Initialise OPAL Power-Shifting-Ratio interface */
+ opal_psr_init();
+
+ /* Initialise OPAL sensor groups */
+ opal_sensor_groups_init();
+
+ /* Initialise OPAL Power control interface */
+ opal_power_control_init();
+
+ /* Initialize OPAL secure variables */
+ opal_pdev_init("ibm,secvar-backend");
+
+ return 0;
+}
+machine_subsys_initcall(powernv, opal_init);
+
+void opal_shutdown(void)
+{
+ long rc = OPAL_BUSY;
+
+ opal_event_shutdown();
+
+ /*
+ * Then sync with OPAL which ensure anything that can
+ * potentially write to our memory has completed such
+ * as an ongoing dump retrieval
+ */
+ while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+ rc = opal_sync_host_reboot();
+ if (rc == OPAL_BUSY)
+ opal_poll_events(NULL);
+ else
+ mdelay(10);
+ }
+
+ /* Unregister memory dump region */
+ if (opal_check_token(OPAL_UNREGISTER_DUMP_REGION))
+ opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
+}
+
+/* Export this so that test modules can use it */
+EXPORT_SYMBOL_GPL(opal_invalid_call);
+EXPORT_SYMBOL_GPL(opal_xscom_read);
+EXPORT_SYMBOL_GPL(opal_xscom_write);
+EXPORT_SYMBOL_GPL(opal_ipmi_send);
+EXPORT_SYMBOL_GPL(opal_ipmi_recv);
+EXPORT_SYMBOL_GPL(opal_flash_read);
+EXPORT_SYMBOL_GPL(opal_flash_write);
+EXPORT_SYMBOL_GPL(opal_flash_erase);
+EXPORT_SYMBOL_GPL(opal_prd_msg);
+EXPORT_SYMBOL_GPL(opal_check_token);
+
+/* Convert a region of vmalloc memory to an opal sg list */
+struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
+ unsigned long vmalloc_size)
+{
+ struct opal_sg_list *sg, *first = NULL;
+ unsigned long i = 0;
+
+ sg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!sg)
+ goto nomem;
+
+ first = sg;
+
+ while (vmalloc_size > 0) {
+ uint64_t data = vmalloc_to_pfn(vmalloc_addr) << PAGE_SHIFT;
+ uint64_t length = min(vmalloc_size, PAGE_SIZE);
+
+ sg->entry[i].data = cpu_to_be64(data);
+ sg->entry[i].length = cpu_to_be64(length);
+ i++;
+
+ if (i >= SG_ENTRIES_PER_NODE) {
+ struct opal_sg_list *next;
+
+ next = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!next)
+ goto nomem;
+
+ sg->length = cpu_to_be64(
+ i * sizeof(struct opal_sg_entry) + 16);
+ i = 0;
+ sg->next = cpu_to_be64(__pa(next));
+ sg = next;
+ }
+
+ vmalloc_addr += length;
+ vmalloc_size -= length;
+ }
+
+ sg->length = cpu_to_be64(i * sizeof(struct opal_sg_entry) + 16);
+
+ return first;
+
+nomem:
+ pr_err("%s : Failed to allocate memory\n", __func__);
+ opal_free_sg_list(first);
+ return NULL;
+}
+
+void opal_free_sg_list(struct opal_sg_list *sg)
+{
+ while (sg) {
+ uint64_t next = be64_to_cpu(sg->next);
+
+ kfree(sg);
+
+ if (next)
+ sg = __va(next);
+ else
+ sg = NULL;
+ }
+}
+
+int opal_error_code(int rc)
+{
+ switch (rc) {
+ case OPAL_SUCCESS: return 0;
+
+ case OPAL_PARAMETER: return -EINVAL;
+ case OPAL_ASYNC_COMPLETION: return -EINPROGRESS;
+ case OPAL_BUSY:
+ case OPAL_BUSY_EVENT: return -EBUSY;
+ case OPAL_NO_MEM: return -ENOMEM;
+ case OPAL_PERMISSION: return -EPERM;
+
+ case OPAL_UNSUPPORTED: return -EIO;
+ case OPAL_HARDWARE: return -EIO;
+ case OPAL_INTERNAL_ERROR: return -EIO;
+ case OPAL_TIMEOUT: return -ETIMEDOUT;
+ default:
+ pr_err("%s: unexpected OPAL error %d\n", __func__, rc);
+ return -EIO;
+ }
+}
+
+void powernv_set_nmmu_ptcr(unsigned long ptcr)
+{
+ int rc;
+
+ if (firmware_has_feature(FW_FEATURE_OPAL)) {
+ rc = opal_nmmu_set_ptcr(-1UL, ptcr);
+ if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
+ pr_warn("%s: Unable to set nest mmu ptcr\n", __func__);
+ }
+}
+
+EXPORT_SYMBOL_GPL(opal_poll_events);
+EXPORT_SYMBOL_GPL(opal_rtc_read);
+EXPORT_SYMBOL_GPL(opal_rtc_write);
+EXPORT_SYMBOL_GPL(opal_tpo_read);
+EXPORT_SYMBOL_GPL(opal_tpo_write);
+EXPORT_SYMBOL_GPL(opal_i2c_request);
+/* Export these symbols for PowerNV LED class driver */
+EXPORT_SYMBOL_GPL(opal_leds_get_ind);
+EXPORT_SYMBOL_GPL(opal_leds_set_ind);
+/* Export this symbol for PowerNV Operator Panel class driver */
+EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
+/* Export this for KVM */
+EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
+EXPORT_SYMBOL_GPL(opal_int_eoi);
+EXPORT_SYMBOL_GPL(opal_error_code);
+/* Export the below symbol for NX compression */
+EXPORT_SYMBOL(opal_nx_coproc_init);
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c b/arch/powerpc/platforms/powernv/pci-cxl.c
new file mode 100644
index 000000000..7e419de71
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-cxl.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2014-2016 IBM Corp.
+ */
+
+#include <linux/module.h>
+#include <misc/cxl-base.h>
+#include <asm/pnv-pci.h>
+#include <asm/opal.h>
+
+#include "pci.h"
+
+int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct pnv_ioda_pe *pe;
+ int rc;
+
+ pe = pnv_ioda_get_pe(dev);
+ if (!pe)
+ return -ENODEV;
+
+ pe_info(pe, "Switching PHB to CXL\n");
+
+ rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number);
+ if (rc == OPAL_UNSUPPORTED)
+ dev_err(&dev->dev, "Required cxl mode not supported by firmware - update skiboot\n");
+ else if (rc)
+ dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc);
+
+ return rc;
+}
+EXPORT_SYMBOL(pnv_phb_to_cxl_mode);
+
+/* Find PHB for cxl dev and allocate MSI hwirqs?
+ * Returns the absolute hardware IRQ number
+ */
+int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ int hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, num);
+
+ if (hwirq < 0) {
+ dev_warn(&dev->dev, "Failed to find a free MSI\n");
+ return -ENOSPC;
+ }
+
+ return phb->msi_base + hwirq;
+}
+EXPORT_SYMBOL(pnv_cxl_alloc_hwirqs);
+
+void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, num);
+}
+EXPORT_SYMBOL(pnv_cxl_release_hwirqs);
+
+void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs,
+ struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ int i, hwirq;
+
+ for (i = 1; i < CXL_IRQ_RANGES; i++) {
+ if (!irqs->range[i])
+ continue;
+ pr_devel("cxl release irq range 0x%x: offset: 0x%lx limit: %ld\n",
+ i, irqs->offset[i],
+ irqs->range[i]);
+ hwirq = irqs->offset[i] - phb->msi_base;
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq,
+ irqs->range[i]);
+ }
+}
+EXPORT_SYMBOL(pnv_cxl_release_hwirq_ranges);
+
+int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
+ struct pci_dev *dev, int num)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ int i, hwirq, try;
+
+ memset(irqs, 0, sizeof(struct cxl_irq_ranges));
+
+ /* 0 is reserved for the multiplexed PSL DSI interrupt */
+ for (i = 1; i < CXL_IRQ_RANGES && num; i++) {
+ try = num;
+ while (try) {
+ hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, try);
+ if (hwirq >= 0)
+ break;
+ try /= 2;
+ }
+ if (!try)
+ goto fail;
+
+ irqs->offset[i] = phb->msi_base + hwirq;
+ irqs->range[i] = try;
+ pr_devel("cxl alloc irq range 0x%x: offset: 0x%lx limit: %li\n",
+ i, irqs->offset[i], irqs->range[i]);
+ num -= try;
+ }
+ if (num)
+ goto fail;
+
+ return 0;
+fail:
+ pnv_cxl_release_hwirq_ranges(irqs, dev);
+ return -ENOSPC;
+}
+EXPORT_SYMBOL(pnv_cxl_alloc_hwirq_ranges);
+
+int pnv_cxl_get_irq_count(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+
+ return phb->msi_bmp.irq_count;
+}
+EXPORT_SYMBOL(pnv_cxl_get_irq_count);
+
+int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
+ unsigned int virq)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ unsigned int xive_num = hwirq - phb->msi_base;
+ struct pnv_ioda_pe *pe;
+ int rc;
+
+ if (!(pe = pnv_ioda_get_pe(dev)))
+ return -ENODEV;
+
+ /* Assign XIVE to PE */
+ rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
+ if (rc) {
+ pe_warn(pe, "%s: OPAL error %d setting msi_base 0x%x "
+ "hwirq 0x%x XIVE 0x%x PE\n",
+ pci_name(dev), rc, phb->msi_base, hwirq, xive_num);
+ return -EIO;
+ }
+ pnv_set_msi_irq_chip(phb, virq);
+
+ return 0;
+}
+EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
new file mode 100644
index 000000000..e96324502
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * TCE helpers for IODA PCI/PCIe on PowerNV platforms
+ *
+ * Copyright 2018 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/iommu.h>
+
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include "pci.h"
+
+unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
+{
+ struct pci_controller *hose = phb->hose;
+ struct device_node *dn = hose->dn;
+ unsigned long mask = 0;
+ int i, rc, count;
+ u32 val;
+
+ count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
+ if (count <= 0) {
+ mask = SZ_4K | SZ_64K;
+ /* Add 16M for POWER8 by default */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ !cpu_has_feature(CPU_FTR_ARCH_300))
+ mask |= SZ_16M | SZ_256M;
+ return mask;
+ }
+
+ for (i = 0; i < count; i++) {
+ rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
+ i, &val);
+ if (rc == 0)
+ mask |= 1ULL << val;
+ }
+
+ return mask;
+}
+
+void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+ void *tce_mem, u64 tce_size,
+ u64 dma_offset, unsigned int page_shift)
+{
+ tbl->it_blocksize = 16;
+ tbl->it_base = (unsigned long)tce_mem;
+ tbl->it_page_shift = page_shift;
+ tbl->it_offset = dma_offset >> tbl->it_page_shift;
+ tbl->it_index = 0;
+ tbl->it_size = tce_size >> 3;
+ tbl->it_busno = 0;
+ tbl->it_type = TCE_PCI;
+}
+
+static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
+{
+ struct page *tce_mem = NULL;
+ __be64 *addr;
+
+ tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
+ shift - PAGE_SHIFT);
+ if (!tce_mem) {
+ pr_err("Failed to allocate a TCE memory, level shift=%d\n",
+ shift);
+ return NULL;
+ }
+ addr = page_address(tce_mem);
+ memset(addr, 0, 1UL << shift);
+
+ return addr;
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+ unsigned long size, unsigned int levels);
+
+static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
+{
+ __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
+ int level = tbl->it_indirect_levels;
+ const long shift = ilog2(tbl->it_level_size);
+ unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
+
+ while (level) {
+ int n = (idx & mask) >> (level * shift);
+ unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
+
+ if (!tce) {
+ __be64 *tmp2;
+
+ if (!alloc)
+ return NULL;
+
+ tmp2 = pnv_alloc_tce_level(tbl->it_nid,
+ ilog2(tbl->it_level_size) + 3);
+ if (!tmp2)
+ return NULL;
+
+ tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
+ oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
+ cpu_to_be64(tce)));
+ if (oldtce) {
+ pnv_pci_ioda2_table_do_free_pages(tmp2,
+ ilog2(tbl->it_level_size) + 3, 1);
+ tce = oldtce;
+ }
+ }
+
+ tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
+ idx &= ~mask;
+ mask >>= shift;
+ --level;
+ }
+
+ return tmp + idx;
+}
+
+int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ u64 proto_tce = iommu_direction_to_tce_perm(direction);
+ u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
+ long i;
+
+ if (proto_tce & TCE_PCI_WRITE)
+ proto_tce |= TCE_PCI_READ;
+
+ for (i = 0; i < npages; i++) {
+ unsigned long newtce = proto_tce |
+ ((rpn + i) << tbl->it_page_shift);
+ unsigned long idx = index - tbl->it_offset + i;
+
+ *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_IOMMU_API
+int pnv_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+ unsigned long newtce = *hpa | proto_tce, oldtce;
+ unsigned long idx = index - tbl->it_offset;
+ __be64 *ptce = NULL;
+
+ BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
+
+ if (*direction == DMA_NONE) {
+ ptce = pnv_tce(tbl, false, idx, false);
+ if (!ptce) {
+ *hpa = 0;
+ return 0;
+ }
+ }
+
+ if (!ptce) {
+ ptce = pnv_tce(tbl, false, idx, true);
+ if (!ptce)
+ return -ENOMEM;
+ }
+
+ if (newtce & TCE_PCI_WRITE)
+ newtce |= TCE_PCI_READ;
+
+ oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
+ *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ *direction = iommu_tce_direction(oldtce);
+
+ return 0;
+}
+
+__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
+{
+ if (WARN_ON_ONCE(!tbl->it_userspace))
+ return NULL;
+
+ return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
+}
+#endif
+
+void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
+{
+ long i;
+
+ for (i = 0; i < npages; i++) {
+ unsigned long idx = index - tbl->it_offset + i;
+ __be64 *ptce = pnv_tce(tbl, false, idx, false);
+
+ if (ptce)
+ *ptce = cpu_to_be64(0);
+ else
+ /* Skip the rest of the level */
+ i |= tbl->it_level_size - 1;
+ }
+}
+
+unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
+{
+ __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
+
+ if (!ptce)
+ return 0;
+
+ return be64_to_cpu(*ptce);
+}
+
+static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
+ unsigned long size, unsigned int levels)
+{
+ const unsigned long addr_ul = (unsigned long) addr &
+ ~(TCE_PCI_READ | TCE_PCI_WRITE);
+
+ if (levels) {
+ long i;
+ u64 *tmp = (u64 *) addr_ul;
+
+ for (i = 0; i < size; ++i) {
+ unsigned long hpa = be64_to_cpu(tmp[i]);
+
+ if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
+ continue;
+
+ pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
+ levels - 1);
+ }
+ }
+
+ free_pages(addr_ul, get_order(size << 3));
+}
+
+void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
+{
+ const unsigned long size = tbl->it_indirect_levels ?
+ tbl->it_level_size : tbl->it_size;
+
+ if (!tbl->it_size)
+ return;
+
+ pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
+ tbl->it_indirect_levels);
+ if (tbl->it_userspace) {
+ pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
+ tbl->it_indirect_levels);
+ }
+}
+
+static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
+ unsigned int levels, unsigned long limit,
+ unsigned long *current_offset, unsigned long *total_allocated)
+{
+ __be64 *addr, *tmp;
+ unsigned long allocated = 1UL << shift;
+ unsigned int entries = 1UL << (shift - 3);
+ long i;
+
+ addr = pnv_alloc_tce_level(nid, shift);
+ *total_allocated += allocated;
+
+ --levels;
+ if (!levels) {
+ *current_offset += allocated;
+ return addr;
+ }
+
+ for (i = 0; i < entries; ++i) {
+ tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
+ levels, limit, current_offset, total_allocated);
+ if (!tmp)
+ break;
+
+ addr[i] = cpu_to_be64(__pa(tmp) |
+ TCE_PCI_READ | TCE_PCI_WRITE);
+
+ if (*current_offset >= limit)
+ break;
+ }
+
+ return addr;
+}
+
+long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ bool alloc_userspace_copy, struct iommu_table *tbl)
+{
+ void *addr, *uas = NULL;
+ unsigned long offset = 0, level_shift, total_allocated = 0;
+ unsigned long total_allocated_uas = 0;
+ const unsigned int window_shift = ilog2(window_size);
+ unsigned int entries_shift = window_shift - page_shift;
+ unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
+ PAGE_SHIFT);
+ const unsigned long tce_table_size = 1UL << table_shift;
+
+ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
+ return -EINVAL;
+
+ if (!is_power_of_2(window_size))
+ return -EINVAL;
+
+ /* Adjust direct table size from window_size and levels */
+ entries_shift = (entries_shift + levels - 1) / levels;
+ level_shift = entries_shift + 3;
+ level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
+
+ if ((level_shift - 3) * levels + page_shift >= 55)
+ return -EINVAL;
+
+ /* Allocate TCE table */
+ addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+ 1, tce_table_size, &offset, &total_allocated);
+
+ /* addr==NULL means that the first level allocation failed */
+ if (!addr)
+ return -ENOMEM;
+
+ /*
+ * First level was allocated but some lower level failed as
+ * we did not allocate as much as we wanted,
+ * release partially allocated table.
+ */
+ if (levels == 1 && offset < tce_table_size)
+ goto free_tces_exit;
+
+ /* Allocate userspace view of the TCE table */
+ if (alloc_userspace_copy) {
+ offset = 0;
+ uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
+ 1, tce_table_size, &offset,
+ &total_allocated_uas);
+ if (!uas)
+ goto free_tces_exit;
+ if (levels == 1 && (offset < tce_table_size ||
+ total_allocated_uas != total_allocated))
+ goto free_uas_exit;
+ }
+
+ /* Setup linux iommu table */
+ pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
+ page_shift);
+ tbl->it_level_size = 1ULL << (level_shift - 3);
+ tbl->it_indirect_levels = levels - 1;
+ tbl->it_userspace = uas;
+ tbl->it_nid = nid;
+
+ pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
+ window_size, tce_table_size, bus_offset, tbl->it_base,
+ tbl->it_userspace, 1, levels);
+
+ return 0;
+
+free_uas_exit:
+ pnv_pci_ioda2_table_do_free_pages(uas,
+ 1ULL << (level_shift - 3), levels - 1);
+free_tces_exit:
+ pnv_pci_ioda2_table_do_free_pages(addr,
+ 1ULL << (level_shift - 3), levels - 1);
+
+ return -ENOMEM;
+}
+
+void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+ struct iommu_table_group *table_group)
+{
+ long i;
+ bool found;
+ struct iommu_table_group_link *tgl;
+
+ if (!tbl || !table_group)
+ return;
+
+ /* Remove link to a group from table's list of attached groups */
+ found = false;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+ if (tgl->table_group == table_group) {
+ list_del_rcu(&tgl->next);
+ kfree_rcu(tgl, rcu);
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (WARN_ON(!found))
+ return;
+
+ /* Clean a pointer to iommu_table in iommu_table_group::tables[] */
+ found = false;
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ if (table_group->tables[i] == tbl) {
+ iommu_tce_table_put(tbl);
+ table_group->tables[i] = NULL;
+ found = true;
+ break;
+ }
+ }
+ WARN_ON(!found);
+}
+
+long pnv_pci_link_table_and_group(int node, int num,
+ struct iommu_table *tbl,
+ struct iommu_table_group *table_group)
+{
+ struct iommu_table_group_link *tgl = NULL;
+
+ if (WARN_ON(!tbl || !table_group))
+ return -EINVAL;
+
+ tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
+ node);
+ if (!tgl)
+ return -ENOMEM;
+
+ tgl->table_group = table_group;
+ list_add_rcu(&tgl->next, &tbl->it_group_list);
+
+ table_group->tables[num] = iommu_tce_table_get(tbl);
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
new file mode 100644
index 000000000..28fac4770
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -0,0 +1,2827 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support PCI/PCIe on PowerNV platforms
+ *
+ * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+#include <linux/iommu.h>
+#include <linux/rculist.h>
+#include <linux/sizes.h>
+#include <linux/debugfs.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
+#include <asm/opal.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/xics.h>
+#include <asm/firmware.h>
+#include <asm/pnv-pci.h>
+#include <asm/mmzone.h>
+#include <asm/xive.h>
+
+#include <misc/cxl-base.h>
+
+#include "powernv.h"
+#include "pci.h"
+#include "../../../../drivers/pci/pci.h"
+
+/* This array is indexed with enum pnv_phb_type */
+static const char * const pnv_phb_names[] = { "IODA2", "NPU_OCAPI" };
+
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+static void pnv_pci_configure_bus(struct pci_bus *bus);
+
+void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+ char pfix[32];
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ if (pe->flags & PNV_IODA_PE_DEV)
+ strscpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
+ else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+ sprintf(pfix, "%04x:%02x ",
+ pci_domain_nr(pe->pbus), pe->pbus->number);
+#ifdef CONFIG_PCI_IOV
+ else if (pe->flags & PNV_IODA_PE_VF)
+ sprintf(pfix, "%04x:%02x:%2x.%d",
+ pci_domain_nr(pe->parent_dev->bus),
+ (pe->rid & 0xff00) >> 8,
+ PCI_SLOT(pe->rid), PCI_FUNC(pe->rid));
+#endif /* CONFIG_PCI_IOV*/
+
+ printk("%spci %s: [PE# %.2x] %pV",
+ level, pfix, pe->pe_number, &vaf);
+
+ va_end(args);
+}
+
+static bool pnv_iommu_bypass_disabled __read_mostly;
+static bool pci_reset_phbs __read_mostly;
+
+static int __init iommu_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ while (*str) {
+ if (!strncmp(str, "nobypass", 8)) {
+ pnv_iommu_bypass_disabled = true;
+ pr_info("PowerNV: IOMMU bypass window disabled.\n");
+ break;
+ }
+ str += strcspn(str, ",");
+ if (*str == ',')
+ str++;
+ }
+
+ return 0;
+}
+early_param("iommu", iommu_setup);
+
+static int __init pci_reset_phbs_setup(char *str)
+{
+ pci_reset_phbs = true;
+ return 0;
+}
+
+early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
+
+static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
+{
+ s64 rc;
+
+ phb->ioda.pe_array[pe_no].phb = phb;
+ phb->ioda.pe_array[pe_no].pe_number = pe_no;
+ phb->ioda.pe_array[pe_no].dma_setup_done = false;
+
+ /*
+ * Clear the PE frozen state as it might be put into frozen state
+ * in the last PCI remove path. It's not harmful to do so when the
+ * PE is already in unfrozen state.
+ */
+ rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED)
+ pr_warn("%s: Error %lld unfreezing PHB#%x-PE#%x\n",
+ __func__, rc, phb->hose->global_number, pe_no);
+
+ return &phb->ioda.pe_array[pe_no];
+}
+
+static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
+{
+ if (!(pe_no >= 0 && pe_no < phb->ioda.total_pe_num)) {
+ pr_warn("%s: Invalid PE %x on PHB#%x\n",
+ __func__, pe_no, phb->hose->global_number);
+ return;
+ }
+
+ mutex_lock(&phb->ioda.pe_alloc_mutex);
+ if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
+ pr_debug("%s: PE %x was reserved on PHB#%x\n",
+ __func__, pe_no, phb->hose->global_number);
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
+
+ pnv_ioda_init_pe(phb, pe_no);
+}
+
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count)
+{
+ struct pnv_ioda_pe *ret = NULL;
+ int run = 0, pe, i;
+
+ mutex_lock(&phb->ioda.pe_alloc_mutex);
+
+ /* scan backwards for a run of @count cleared bits */
+ for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
+ if (test_bit(pe, phb->ioda.pe_alloc)) {
+ run = 0;
+ continue;
+ }
+
+ run++;
+ if (run == count)
+ break;
+ }
+ if (run != count)
+ goto out;
+
+ for (i = pe; i < pe + count; i++) {
+ set_bit(i, phb->ioda.pe_alloc);
+ pnv_ioda_init_pe(phb, i);
+ }
+ ret = &phb->ioda.pe_array[pe];
+
+out:
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
+ return ret;
+}
+
+void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
+{
+ struct pnv_phb *phb = pe->phb;
+ unsigned int pe_num = pe->pe_number;
+
+ WARN_ON(pe->pdev);
+ memset(pe, 0, sizeof(struct pnv_ioda_pe));
+
+ mutex_lock(&phb->ioda.pe_alloc_mutex);
+ clear_bit(pe_num, phb->ioda.pe_alloc);
+ mutex_unlock(&phb->ioda.pe_alloc_mutex);
+}
+
+/* The default M64 BAR is shared by all PEs */
+static int pnv_ioda2_init_m64(struct pnv_phb *phb)
+{
+ const char *desc;
+ struct resource *r;
+ s64 rc;
+
+ /* Configure the default M64 BAR */
+ rc = opal_pci_set_phb_mem_window(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ phb->ioda.m64_bar_idx,
+ phb->ioda.m64_base,
+ 0, /* unused */
+ phb->ioda.m64_size);
+ if (rc != OPAL_SUCCESS) {
+ desc = "configuring";
+ goto fail;
+ }
+
+ /* Enable the default M64 BAR */
+ rc = opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ phb->ioda.m64_bar_idx,
+ OPAL_ENABLE_M64_SPLIT);
+ if (rc != OPAL_SUCCESS) {
+ desc = "enabling";
+ goto fail;
+ }
+
+ /*
+ * Exclude the segments for reserved and root bus PE, which
+ * are first or last two PEs.
+ */
+ r = &phb->hose->mem_resources[1];
+ if (phb->ioda.reserved_pe_idx == 0)
+ r->start += (2 * phb->ioda.m64_segsize);
+ else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1))
+ r->end -= (2 * phb->ioda.m64_segsize);
+ else
+ pr_warn(" Cannot strip M64 segment for reserved PE#%x\n",
+ phb->ioda.reserved_pe_idx);
+
+ return 0;
+
+fail:
+ pr_warn(" Failure %lld %s M64 BAR#%d\n",
+ rc, desc, phb->ioda.m64_bar_idx);
+ opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ phb->ioda.m64_bar_idx,
+ OPAL_DISABLE_M64);
+ return -EIO;
+}
+
+static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
+ unsigned long *pe_bitmap)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+ struct resource *r;
+ resource_size_t base, sgsz, start, end;
+ int segno, i;
+
+ base = phb->ioda.m64_base;
+ sgsz = phb->ioda.m64_segsize;
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ r = &pdev->resource[i];
+ if (!r->parent || !pnv_pci_is_m64(phb, r))
+ continue;
+
+ start = ALIGN_DOWN(r->start - base, sgsz);
+ end = ALIGN(r->end - base, sgsz);
+ for (segno = start / sgsz; segno < end / sgsz; segno++) {
+ if (pe_bitmap)
+ set_bit(segno, pe_bitmap);
+ else
+ pnv_ioda_reserve_pe(phb, segno);
+ }
+ }
+}
+
+static void pnv_ioda_reserve_m64_pe(struct pci_bus *bus,
+ unsigned long *pe_bitmap,
+ bool all)
+{
+ struct pci_dev *pdev;
+
+ list_for_each_entry(pdev, &bus->devices, bus_list) {
+ pnv_ioda_reserve_dev_m64_pe(pdev, pe_bitmap);
+
+ if (all && pdev->subordinate)
+ pnv_ioda_reserve_m64_pe(pdev->subordinate,
+ pe_bitmap, all);
+ }
+}
+
+static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
+ struct pnv_ioda_pe *master_pe, *pe;
+ unsigned long size, *pe_alloc;
+ int i;
+
+ /* Root bus shouldn't use M64 */
+ if (pci_is_root_bus(bus))
+ return NULL;
+
+ /* Allocate bitmap */
+ size = ALIGN(phb->ioda.total_pe_num / 8, sizeof(unsigned long));
+ pe_alloc = kzalloc(size, GFP_KERNEL);
+ if (!pe_alloc) {
+ pr_warn("%s: Out of memory !\n",
+ __func__);
+ return NULL;
+ }
+
+ /* Figure out reserved PE numbers by the PE */
+ pnv_ioda_reserve_m64_pe(bus, pe_alloc, all);
+
+ /*
+ * the current bus might not own M64 window and that's all
+ * contributed by its child buses. For the case, we needn't
+ * pick M64 dependent PE#.
+ */
+ if (bitmap_empty(pe_alloc, phb->ioda.total_pe_num)) {
+ kfree(pe_alloc);
+ return NULL;
+ }
+
+ /*
+ * Figure out the master PE and put all slave PEs to master
+ * PE's list to form compound PE.
+ */
+ master_pe = NULL;
+ i = -1;
+ while ((i = find_next_bit(pe_alloc, phb->ioda.total_pe_num, i + 1)) <
+ phb->ioda.total_pe_num) {
+ pe = &phb->ioda.pe_array[i];
+
+ phb->ioda.m64_segmap[pe->pe_number] = pe->pe_number;
+ if (!master_pe) {
+ pe->flags |= PNV_IODA_PE_MASTER;
+ INIT_LIST_HEAD(&pe->slaves);
+ master_pe = pe;
+ } else {
+ pe->flags |= PNV_IODA_PE_SLAVE;
+ pe->master = master_pe;
+ list_add_tail(&pe->list, &master_pe->slaves);
+ }
+ }
+
+ kfree(pe_alloc);
+ return master_pe;
+}
+
+static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
+{
+ struct pci_controller *hose = phb->hose;
+ struct device_node *dn = hose->dn;
+ struct resource *res;
+ u32 m64_range[2], i;
+ const __be32 *r;
+ u64 pci_addr;
+
+ if (phb->type != PNV_PHB_IODA2) {
+ pr_info(" Not support M64 window\n");
+ return;
+ }
+
+ if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+ pr_info(" Firmware too old to support M64 window\n");
+ return;
+ }
+
+ r = of_get_property(dn, "ibm,opal-m64-window", NULL);
+ if (!r) {
+ pr_info(" No <ibm,opal-m64-window> on %pOF\n",
+ dn);
+ return;
+ }
+
+ /*
+ * Find the available M64 BAR range and pickup the last one for
+ * covering the whole 64-bits space. We support only one range.
+ */
+ if (of_property_read_u32_array(dn, "ibm,opal-available-m64-ranges",
+ m64_range, 2)) {
+ /* In absence of the property, assume 0..15 */
+ m64_range[0] = 0;
+ m64_range[1] = 16;
+ }
+ /* We only support 64 bits in our allocator */
+ if (m64_range[1] > 63) {
+ pr_warn("%s: Limiting M64 range to 63 (from %d) on PHB#%x\n",
+ __func__, m64_range[1], phb->hose->global_number);
+ m64_range[1] = 63;
+ }
+ /* Empty range, no m64 */
+ if (m64_range[1] <= m64_range[0]) {
+ pr_warn("%s: M64 empty, disabling M64 usage on PHB#%x\n",
+ __func__, phb->hose->global_number);
+ return;
+ }
+
+ /* Configure M64 informations */
+ res = &hose->mem_resources[1];
+ res->name = dn->full_name;
+ res->start = of_translate_address(dn, r + 2);
+ res->end = res->start + of_read_number(r + 4, 2) - 1;
+ res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+ pci_addr = of_read_number(r, 2);
+ hose->mem_offset[1] = res->start - pci_addr;
+
+ phb->ioda.m64_size = resource_size(res);
+ phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe_num;
+ phb->ioda.m64_base = pci_addr;
+
+ /* This lines up nicely with the display from processing OF ranges */
+ pr_info(" MEM 0x%016llx..0x%016llx -> 0x%016llx (M64 #%d..%d)\n",
+ res->start, res->end, pci_addr, m64_range[0],
+ m64_range[0] + m64_range[1] - 1);
+
+ /* Mark all M64 used up by default */
+ phb->ioda.m64_bar_alloc = (unsigned long)-1;
+
+ /* Use last M64 BAR to cover M64 window */
+ m64_range[1]--;
+ phb->ioda.m64_bar_idx = m64_range[0] + m64_range[1];
+
+ pr_info(" Using M64 #%d as default window\n", phb->ioda.m64_bar_idx);
+
+ /* Mark remaining ones free */
+ for (i = m64_range[0]; i < m64_range[1]; i++)
+ clear_bit(i, &phb->ioda.m64_bar_alloc);
+
+ /*
+ * Setup init functions for M64 based on IODA version, IODA3 uses
+ * the IODA2 code.
+ */
+ phb->init_m64 = pnv_ioda2_init_m64;
+}
+
+static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
+{
+ struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_no];
+ struct pnv_ioda_pe *slave;
+ s64 rc;
+
+ /* Fetch master PE */
+ if (pe->flags & PNV_IODA_PE_SLAVE) {
+ pe = pe->master;
+ if (WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER)))
+ return;
+
+ pe_no = pe->pe_number;
+ }
+
+ /* Freeze master PE */
+ rc = opal_pci_eeh_freeze_set(phb->opal_id,
+ pe_no,
+ OPAL_EEH_ACTION_SET_FREEZE_ALL);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+ __func__, rc, phb->hose->global_number, pe_no);
+ return;
+ }
+
+ /* Freeze slave PEs */
+ if (!(pe->flags & PNV_IODA_PE_MASTER))
+ return;
+
+ list_for_each_entry(slave, &pe->slaves, list) {
+ rc = opal_pci_eeh_freeze_set(phb->opal_id,
+ slave->pe_number,
+ OPAL_EEH_ACTION_SET_FREEZE_ALL);
+ if (rc != OPAL_SUCCESS)
+ pr_warn("%s: Failure %lld freezing PHB#%x-PE#%x\n",
+ __func__, rc, phb->hose->global_number,
+ slave->pe_number);
+ }
+}
+
+static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt)
+{
+ struct pnv_ioda_pe *pe, *slave;
+ s64 rc;
+
+ /* Find master PE */
+ pe = &phb->ioda.pe_array[pe_no];
+ if (pe->flags & PNV_IODA_PE_SLAVE) {
+ pe = pe->master;
+ WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+ pe_no = pe->pe_number;
+ }
+
+ /* Clear frozen state for master PE */
+ rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no, opt);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+ __func__, rc, opt, phb->hose->global_number, pe_no);
+ return -EIO;
+ }
+
+ if (!(pe->flags & PNV_IODA_PE_MASTER))
+ return 0;
+
+ /* Clear frozen state for slave PEs */
+ list_for_each_entry(slave, &pe->slaves, list) {
+ rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+ slave->pe_number,
+ opt);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld clear %d on PHB#%x-PE#%x\n",
+ __func__, rc, opt, phb->hose->global_number,
+ slave->pe_number);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
+{
+ struct pnv_ioda_pe *slave, *pe;
+ u8 fstate = 0, state;
+ __be16 pcierr = 0;
+ s64 rc;
+
+ /* Sanity check on PE number */
+ if (pe_no < 0 || pe_no >= phb->ioda.total_pe_num)
+ return OPAL_EEH_STOPPED_PERM_UNAVAIL;
+
+ /*
+ * Fetch the master PE and the PE instance might be
+ * not initialized yet.
+ */
+ pe = &phb->ioda.pe_array[pe_no];
+ if (pe->flags & PNV_IODA_PE_SLAVE) {
+ pe = pe->master;
+ WARN_ON(!pe || !(pe->flags & PNV_IODA_PE_MASTER));
+ pe_no = pe->pe_number;
+ }
+
+ /* Check the master PE */
+ rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+ &state, &pcierr, NULL);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld getting "
+ "PHB#%x-PE#%x state\n",
+ __func__, rc,
+ phb->hose->global_number, pe_no);
+ return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+ }
+
+ /* Check the slave PE */
+ if (!(pe->flags & PNV_IODA_PE_MASTER))
+ return state;
+
+ list_for_each_entry(slave, &pe->slaves, list) {
+ rc = opal_pci_eeh_freeze_status(phb->opal_id,
+ slave->pe_number,
+ &fstate,
+ &pcierr,
+ NULL);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("%s: Failure %lld getting "
+ "PHB#%x-PE#%x state\n",
+ __func__, rc,
+ phb->hose->global_number, slave->pe_number);
+ return OPAL_EEH_STOPPED_TEMP_UNAVAIL;
+ }
+
+ /*
+ * Override the result based on the ascending
+ * priority.
+ */
+ if (fstate > state)
+ state = fstate;
+ }
+
+ return state;
+}
+
+struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn)
+{
+ int pe_number = phb->ioda.pe_rmap[bdfn];
+
+ if (pe_number == IODA_INVALID_PE)
+ return NULL;
+
+ return &phb->ioda.pe_array[pe_number];
+}
+
+struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
+ struct pci_dn *pdn = pci_get_pdn(dev);
+
+ if (!pdn)
+ return NULL;
+ if (pdn->pe_number == IODA_INVALID_PE)
+ return NULL;
+ return &phb->ioda.pe_array[pdn->pe_number];
+}
+
+static int pnv_ioda_set_one_peltv(struct pnv_phb *phb,
+ struct pnv_ioda_pe *parent,
+ struct pnv_ioda_pe *child,
+ bool is_add)
+{
+ const char *desc = is_add ? "adding" : "removing";
+ uint8_t op = is_add ? OPAL_ADD_PE_TO_DOMAIN :
+ OPAL_REMOVE_PE_FROM_DOMAIN;
+ struct pnv_ioda_pe *slave;
+ long rc;
+
+ /* Parent PE affects child PE */
+ rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number,
+ child->pe_number, op);
+ if (rc != OPAL_SUCCESS) {
+ pe_warn(child, "OPAL error %ld %s to parent PELTV\n",
+ rc, desc);
+ return -ENXIO;
+ }
+
+ if (!(child->flags & PNV_IODA_PE_MASTER))
+ return 0;
+
+ /* Compound case: parent PE affects slave PEs */
+ list_for_each_entry(slave, &child->slaves, list) {
+ rc = opal_pci_set_peltv(phb->opal_id, parent->pe_number,
+ slave->pe_number, op);
+ if (rc != OPAL_SUCCESS) {
+ pe_warn(slave, "OPAL error %ld %s to parent PELTV\n",
+ rc, desc);
+ return -ENXIO;
+ }
+ }
+
+ return 0;
+}
+
+static int pnv_ioda_set_peltv(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe,
+ bool is_add)
+{
+ struct pnv_ioda_pe *slave;
+ struct pci_dev *pdev = NULL;
+ int ret;
+
+ /*
+ * Clear PE frozen state. If it's master PE, we need
+ * clear slave PE frozen state as well.
+ */
+ if (is_add) {
+ opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ if (pe->flags & PNV_IODA_PE_MASTER) {
+ list_for_each_entry(slave, &pe->slaves, list)
+ opal_pci_eeh_freeze_clear(phb->opal_id,
+ slave->pe_number,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ }
+ }
+
+ /*
+ * Associate PE in PELT. We need add the PE into the
+ * corresponding PELT-V as well. Otherwise, the error
+ * originated from the PE might contribute to other
+ * PEs.
+ */
+ ret = pnv_ioda_set_one_peltv(phb, pe, pe, is_add);
+ if (ret)
+ return ret;
+
+ /* For compound PEs, any one affects all of them */
+ if (pe->flags & PNV_IODA_PE_MASTER) {
+ list_for_each_entry(slave, &pe->slaves, list) {
+ ret = pnv_ioda_set_one_peltv(phb, slave, pe, is_add);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
+ pdev = pe->pbus->self;
+ else if (pe->flags & PNV_IODA_PE_DEV)
+ pdev = pe->pdev->bus->self;
+#ifdef CONFIG_PCI_IOV
+ else if (pe->flags & PNV_IODA_PE_VF)
+ pdev = pe->parent_dev;
+#endif /* CONFIG_PCI_IOV */
+ while (pdev) {
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ struct pnv_ioda_pe *parent;
+
+ if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+ parent = &phb->ioda.pe_array[pdn->pe_number];
+ ret = pnv_ioda_set_one_peltv(phb, parent, pe, is_add);
+ if (ret)
+ return ret;
+ }
+
+ pdev = pdev->bus->self;
+ }
+
+ return 0;
+}
+
+static void pnv_ioda_unset_peltv(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe,
+ struct pci_dev *parent)
+{
+ int64_t rc;
+
+ while (parent) {
+ struct pci_dn *pdn = pci_get_pdn(parent);
+
+ if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+ rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
+ pe->pe_number,
+ OPAL_REMOVE_PE_FROM_DOMAIN);
+ /* XXX What to do in case of error ? */
+ }
+ parent = parent->bus->self;
+ }
+
+ opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+ /* Disassociate PE in PELT */
+ rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+ pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+ if (rc)
+ pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
+}
+
+int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+{
+ struct pci_dev *parent;
+ uint8_t bcomp, dcomp, fcomp;
+ int64_t rc;
+ long rid_end, rid;
+
+ /* Currently, we just deconfigure VF PE. Bus PE will always there.*/
+ if (pe->pbus) {
+ int count;
+
+ dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
+ fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
+ parent = pe->pbus->self;
+ if (pe->flags & PNV_IODA_PE_BUS_ALL)
+ count = resource_size(&pe->pbus->busn_res);
+ else
+ count = 1;
+
+ switch(count) {
+ case 1: bcomp = OpalPciBusAll; break;
+ case 2: bcomp = OpalPciBus7Bits; break;
+ case 4: bcomp = OpalPciBus6Bits; break;
+ case 8: bcomp = OpalPciBus5Bits; break;
+ case 16: bcomp = OpalPciBus4Bits; break;
+ case 32: bcomp = OpalPciBus3Bits; break;
+ default:
+ dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+ count);
+ /* Do an exact match only */
+ bcomp = OpalPciBusAll;
+ }
+ rid_end = pe->rid + (count << 8);
+ } else {
+#ifdef CONFIG_PCI_IOV
+ if (pe->flags & PNV_IODA_PE_VF)
+ parent = pe->parent_dev;
+ else
+#endif
+ parent = pe->pdev->bus->self;
+ bcomp = OpalPciBusAll;
+ dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
+ fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
+ rid_end = pe->rid + 1;
+ }
+
+ /* Clear the reverse map */
+ for (rid = pe->rid; rid < rid_end; rid++)
+ phb->ioda.pe_rmap[rid] = IODA_INVALID_PE;
+
+ /*
+ * Release from all parents PELT-V. NPUs don't have a PELTV
+ * table
+ */
+ if (phb->type != PNV_PHB_NPU_OCAPI)
+ pnv_ioda_unset_peltv(phb, pe, parent);
+
+ rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+ bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
+ if (rc)
+ pe_err(pe, "OPAL error %lld trying to setup PELT table\n", rc);
+
+ pe->pbus = NULL;
+ pe->pdev = NULL;
+#ifdef CONFIG_PCI_IOV
+ pe->parent_dev = NULL;
+#endif
+
+ return 0;
+}
+
+int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+{
+ uint8_t bcomp, dcomp, fcomp;
+ long rc, rid_end, rid;
+
+ /* Bus validation ? */
+ if (pe->pbus) {
+ int count;
+
+ dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
+ fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
+ if (pe->flags & PNV_IODA_PE_BUS_ALL)
+ count = resource_size(&pe->pbus->busn_res);
+ else
+ count = 1;
+
+ switch(count) {
+ case 1: bcomp = OpalPciBusAll; break;
+ case 2: bcomp = OpalPciBus7Bits; break;
+ case 4: bcomp = OpalPciBus6Bits; break;
+ case 8: bcomp = OpalPciBus5Bits; break;
+ case 16: bcomp = OpalPciBus4Bits; break;
+ case 32: bcomp = OpalPciBus3Bits; break;
+ default:
+ dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
+ count);
+ /* Do an exact match only */
+ bcomp = OpalPciBusAll;
+ }
+ rid_end = pe->rid + (count << 8);
+ } else {
+ bcomp = OpalPciBusAll;
+ dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
+ fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
+ rid_end = pe->rid + 1;
+ }
+
+ /*
+ * Associate PE in PELT. We need add the PE into the
+ * corresponding PELT-V as well. Otherwise, the error
+ * originated from the PE might contribute to other
+ * PEs.
+ */
+ rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
+ bcomp, dcomp, fcomp, OPAL_MAP_PE);
+ if (rc) {
+ pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+ return -ENXIO;
+ }
+
+ /*
+ * Configure PELTV. NPUs don't have a PELTV table so skip
+ * configuration on them.
+ */
+ if (phb->type != PNV_PHB_NPU_OCAPI)
+ pnv_ioda_set_peltv(phb, pe, true);
+
+ /* Setup reverse map */
+ for (rid = pe->rid; rid < rid_end; rid++)
+ phb->ioda.pe_rmap[rid] = pe->pe_number;
+
+ pe->mve_number = 0;
+
+ return 0;
+}
+
+static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
+ struct pci_dn *pdn = pci_get_pdn(dev);
+ struct pnv_ioda_pe *pe;
+
+ if (!pdn) {
+ pr_err("%s: Device tree node not associated properly\n",
+ pci_name(dev));
+ return NULL;
+ }
+ if (pdn->pe_number != IODA_INVALID_PE)
+ return NULL;
+
+ pe = pnv_ioda_alloc_pe(phb, 1);
+ if (!pe) {
+ pr_warn("%s: Not enough PE# available, disabling device\n",
+ pci_name(dev));
+ return NULL;
+ }
+
+ /* NOTE: We don't get a reference for the pointer in the PE
+ * data structure, both the device and PE structures should be
+ * destroyed at the same time.
+ *
+ * At some point we want to remove the PDN completely anyways
+ */
+ pdn->pe_number = pe->pe_number;
+ pe->flags = PNV_IODA_PE_DEV;
+ pe->pdev = dev;
+ pe->pbus = NULL;
+ pe->mve_number = -1;
+ pe->rid = dev->bus->number << 8 | pdn->devfn;
+ pe->device_count++;
+
+ pe_info(pe, "Associated device to PE\n");
+
+ if (pnv_ioda_configure_pe(phb, pe)) {
+ /* XXX What do we do here ? */
+ pnv_ioda_free_pe(pe);
+ pdn->pe_number = IODA_INVALID_PE;
+ pe->pdev = NULL;
+ return NULL;
+ }
+
+ /* Put PE to the list */
+ mutex_lock(&phb->ioda.pe_list_mutex);
+ list_add_tail(&pe->list, &phb->ioda.pe_list);
+ mutex_unlock(&phb->ioda.pe_list_mutex);
+ return pe;
+}
+
+/*
+ * There're 2 types of PCI bus sensitive PEs: One that is compromised of
+ * single PCI bus. Another one that contains the primary PCI bus and its
+ * subordinate PCI devices and buses. The second type of PE is normally
+ * orgiriated by PCIe-to-PCI bridge or PLX switch downstream ports.
+ */
+static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
+ struct pnv_ioda_pe *pe = NULL;
+ unsigned int pe_num;
+
+ /*
+ * In partial hotplug case, the PE instance might be still alive.
+ * We should reuse it instead of allocating a new one.
+ */
+ pe_num = phb->ioda.pe_rmap[bus->number << 8];
+ if (WARN_ON(pe_num != IODA_INVALID_PE)) {
+ pe = &phb->ioda.pe_array[pe_num];
+ return NULL;
+ }
+
+ /* PE number for root bus should have been reserved */
+ if (pci_is_root_bus(bus))
+ pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx];
+
+ /* Check if PE is determined by M64 */
+ if (!pe)
+ pe = pnv_ioda_pick_m64_pe(bus, all);
+
+ /* The PE number isn't pinned by M64 */
+ if (!pe)
+ pe = pnv_ioda_alloc_pe(phb, 1);
+
+ if (!pe) {
+ pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
+ __func__, pci_domain_nr(bus), bus->number);
+ return NULL;
+ }
+
+ pe->flags |= (all ? PNV_IODA_PE_BUS_ALL : PNV_IODA_PE_BUS);
+ pe->pbus = bus;
+ pe->pdev = NULL;
+ pe->mve_number = -1;
+ pe->rid = bus->busn_res.start << 8;
+
+ if (all)
+ pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n",
+ &bus->busn_res.start, &bus->busn_res.end,
+ pe->pe_number);
+ else
+ pe_info(pe, "Secondary bus %pad associated with PE#%x\n",
+ &bus->busn_res.start, pe->pe_number);
+
+ if (pnv_ioda_configure_pe(phb, pe)) {
+ /* XXX What do we do here ? */
+ pnv_ioda_free_pe(pe);
+ pe->pbus = NULL;
+ return NULL;
+ }
+
+ /* Put PE to the list */
+ list_add_tail(&pe->list, &phb->ioda.pe_list);
+
+ return pe;
+}
+
+static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ struct pnv_ioda_pe *pe;
+
+ /* Check if the BDFN for this device is associated with a PE yet */
+ pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
+ if (!pe) {
+ /* VF PEs should be pre-configured in pnv_pci_sriov_enable() */
+ if (WARN_ON(pdev->is_virtfn))
+ return;
+
+ pnv_pci_configure_bus(pdev->bus);
+ pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
+ pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff);
+
+
+ /*
+ * If we can't setup the IODA PE something has gone horribly
+ * wrong and we can't enable DMA for the device.
+ */
+ if (WARN_ON(!pe))
+ return;
+ } else {
+ pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number);
+ }
+
+ /*
+ * We assume that bridges *probably* don't need to do any DMA so we can
+ * skip allocating a TCE table, etc unless we get a non-bridge device.
+ */
+ if (!pe->dma_setup_done && !pci_is_bridge(pdev)) {
+ switch (phb->type) {
+ case PNV_PHB_IODA2:
+ pnv_pci_ioda2_setup_dma_pe(phb, pe);
+ break;
+ default:
+ pr_warn("%s: No DMA for PHB#%x (type %d)\n",
+ __func__, phb->hose->global_number, phb->type);
+ }
+ }
+
+ if (pdn)
+ pdn->pe_number = pe->pe_number;
+ pe->device_count++;
+
+ WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
+ pdev->dev.archdata.dma_offset = pe->tce_bypass_base;
+ set_iommu_table_base(&pdev->dev, pe->table_group.tables[0]);
+
+ /* PEs with a DMA weight of zero won't have a group */
+ if (pe->table_group.group)
+ iommu_add_device(&pe->table_group, &pdev->dev);
+}
+
+/*
+ * Reconfigure TVE#0 to be usable as 64-bit DMA space.
+ *
+ * The first 4GB of virtual memory for a PE is reserved for 32-bit accesses.
+ * Devices can only access more than that if bit 59 of the PCI address is set
+ * by hardware, which indicates TVE#1 should be used instead of TVE#0.
+ * Many PCI devices are not capable of addressing that many bits, and as a
+ * result are limited to the 4GB of virtual memory made available to 32-bit
+ * devices in TVE#0.
+ *
+ * In order to work around this, reconfigure TVE#0 to be suitable for 64-bit
+ * devices by configuring the virtual memory past the first 4GB inaccessible
+ * by 64-bit DMAs. This should only be used by devices that want more than
+ * 4GB, and only on PEs that have no 32-bit devices.
+ *
+ * Currently this will only work on PHB3 (POWER8).
+ */
+static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
+{
+ u64 window_size, table_size, tce_count, addr;
+ struct page *table_pages;
+ u64 tce_order = 28; /* 256MB TCEs */
+ __be64 *tces;
+ s64 rc;
+
+ /*
+ * Window size needs to be a power of two, but needs to account for
+ * shifting memory by the 4GB offset required to skip 32bit space.
+ */
+ window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32));
+ tce_count = window_size >> tce_order;
+ table_size = tce_count << 3;
+
+ if (table_size < PAGE_SIZE)
+ table_size = PAGE_SIZE;
+
+ table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL,
+ get_order(table_size));
+ if (!table_pages)
+ goto err;
+
+ tces = page_address(table_pages);
+ if (!tces)
+ goto err;
+
+ memset(tces, 0, table_size);
+
+ for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) {
+ tces[(addr + (1ULL << 32)) >> tce_order] =
+ cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE);
+ }
+
+ rc = opal_pci_map_pe_dma_window(pe->phb->opal_id,
+ pe->pe_number,
+ /* reconfigure window 0 */
+ (pe->pe_number << 1) + 0,
+ 1,
+ __pa(tces),
+ table_size,
+ 1 << tce_order);
+ if (rc == OPAL_SUCCESS) {
+ pe_info(pe, "Using 64-bit DMA iommu bypass (through TVE#0)\n");
+ return 0;
+ }
+err:
+ pe_err(pe, "Error configuring 64-bit DMA bypass\n");
+ return -EIO;
+}
+
+static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
+ u64 dma_mask)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ struct pnv_ioda_pe *pe;
+
+ if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+ return false;
+
+ pe = &phb->ioda.pe_array[pdn->pe_number];
+ if (pe->tce_bypass_enabled) {
+ u64 top = pe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+ if (dma_mask >= top)
+ return true;
+ }
+
+ /*
+ * If the device can't set the TCE bypass bit but still wants
+ * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
+ * bypass the 32-bit region and be usable for 64-bit DMAs.
+ * The device needs to be able to address all of this space.
+ */
+ if (dma_mask >> 32 &&
+ dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
+ /* pe->pdev should be set if it's a single device, pe->pbus if not */
+ (pe->device_count == 1 || !pe->pbus) &&
+ phb->model == PNV_PHB_MODEL_PHB3) {
+ /* Configure the bypass mode */
+ s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
+ if (rc)
+ return false;
+ /* 4GB offset bypasses 32-bit space */
+ pdev->dev.archdata.dma_offset = (1ULL << 32);
+ return true;
+ }
+
+ return false;
+}
+
+static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb)
+{
+ return phb->regs + 0x210;
+}
+
+#ifdef CONFIG_IOMMU_API
+/* Common for IODA1 and IODA2 */
+static int pnv_ioda_tce_xchg_no_kill(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ return pnv_tce_xchg(tbl, index, hpa, direction);
+}
+#endif
+
+#define PHB3_TCE_KILL_INVAL_ALL PPC_BIT(0)
+#define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1)
+#define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2)
+
+static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
+{
+ /* 01xb - invalidate TCEs that match the specified PE# */
+ __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb);
+ unsigned long val = PHB3_TCE_KILL_INVAL_PE | (pe->pe_number & 0xFF);
+
+ mb(); /* Ensure above stores are visible */
+ __raw_writeq_be(val, invalidate);
+}
+
+static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe,
+ unsigned shift, unsigned long index,
+ unsigned long npages)
+{
+ __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb);
+ unsigned long start, end, inc;
+
+ /* We'll invalidate DMA address in PE scope */
+ start = PHB3_TCE_KILL_INVAL_ONE;
+ start |= (pe->pe_number & 0xFF);
+ end = start;
+
+ /* Figure out the start, end and step */
+ start |= (index << shift);
+ end |= ((index + npages - 1) << shift);
+ inc = (0x1ull << shift);
+ mb();
+
+ while (start <= end) {
+ __raw_writeq_be(start, invalidate);
+ start += inc;
+ }
+}
+
+static inline void pnv_pci_ioda2_tce_invalidate_pe(struct pnv_ioda_pe *pe)
+{
+ struct pnv_phb *phb = pe->phb;
+
+ if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
+ pnv_pci_phb3_tce_invalidate_pe(pe);
+ else
+ opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL_PE,
+ pe->pe_number, 0, 0, 0);
+}
+
+static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+ unsigned long index, unsigned long npages)
+{
+ struct iommu_table_group_link *tgl;
+
+ list_for_each_entry_lockless(tgl, &tbl->it_group_list, next) {
+ struct pnv_ioda_pe *pe = container_of(tgl->table_group,
+ struct pnv_ioda_pe, table_group);
+ struct pnv_phb *phb = pe->phb;
+ unsigned int shift = tbl->it_page_shift;
+
+ if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
+ pnv_pci_phb3_tce_invalidate(pe, shift,
+ index, npages);
+ else
+ opal_pci_tce_kill(phb->opal_id,
+ OPAL_PCI_TCE_KILL_PAGES,
+ pe->pe_number, 1u << shift,
+ index << shift, npages);
+ }
+}
+
+static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ int ret = pnv_tce_build(tbl, index, npages, uaddr, direction,
+ attrs);
+
+ if (!ret)
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages);
+
+ return ret;
+}
+
+static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
+ long npages)
+{
+ pnv_tce_free(tbl, index, npages);
+
+ pnv_pci_ioda2_tce_invalidate(tbl, index, npages);
+}
+
+static struct iommu_table_ops pnv_ioda2_iommu_ops = {
+ .set = pnv_ioda2_tce_build,
+#ifdef CONFIG_IOMMU_API
+ .xchg_no_kill = pnv_ioda_tce_xchg_no_kill,
+ .tce_kill = pnv_pci_ioda2_tce_invalidate,
+ .useraddrptr = pnv_tce_useraddrptr,
+#endif
+ .clear = pnv_ioda2_tce_free,
+ .get = pnv_tce_get,
+ .free = pnv_pci_ioda2_table_free_pages,
+};
+
+static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ struct pnv_phb *phb = pe->phb;
+ int64_t rc;
+ const unsigned long size = tbl->it_indirect_levels ?
+ tbl->it_level_size : tbl->it_size;
+ const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
+ const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+
+ pe_info(pe, "Setting up window#%d %llx..%llx pg=%lx\n",
+ num, start_addr, start_addr + win_size - 1,
+ IOMMU_PAGE_SIZE(tbl));
+
+ /*
+ * Map TCE table through TVT. The TVE index is the PE number
+ * shifted by 1 bit for 32-bits DMA space.
+ */
+ rc = opal_pci_map_pe_dma_window(phb->opal_id,
+ pe->pe_number,
+ (pe->pe_number << 1) + num,
+ tbl->it_indirect_levels + 1,
+ __pa(tbl->it_base),
+ size << 3,
+ IOMMU_PAGE_SIZE(tbl));
+ if (rc) {
+ pe_err(pe, "Failed to configure TCE table, err %lld\n", rc);
+ return rc;
+ }
+
+ pnv_pci_link_table_and_group(phb->hose->node, num,
+ tbl, &pe->table_group);
+ pnv_pci_ioda2_tce_invalidate_pe(pe);
+
+ return 0;
+}
+
+static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
+{
+ uint16_t window_id = (pe->pe_number << 1 ) + 1;
+ int64_t rc;
+
+ pe_info(pe, "%sabling 64-bit DMA bypass\n", enable ? "En" : "Dis");
+ if (enable) {
+ phys_addr_t top = memblock_end_of_DRAM();
+
+ top = roundup_pow_of_two(top);
+ rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+ pe->pe_number,
+ window_id,
+ pe->tce_bypass_base,
+ top);
+ } else {
+ rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
+ pe->pe_number,
+ window_id,
+ pe->tce_bypass_base,
+ 0);
+ }
+ if (rc)
+ pe_err(pe, "OPAL error %lld configuring bypass window\n", rc);
+ else
+ pe->tce_bypass_enabled = enable;
+}
+
+static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
+ bool alloc_userspace_copy, struct iommu_table **ptbl)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ int nid = pe->phb->hose->node;
+ __u64 bus_offset = num ? pe->tce_bypass_base : table_group->tce32_start;
+ long ret;
+ struct iommu_table *tbl;
+
+ tbl = pnv_pci_table_alloc(nid);
+ if (!tbl)
+ return -ENOMEM;
+
+ tbl->it_ops = &pnv_ioda2_iommu_ops;
+
+ ret = pnv_pci_ioda2_table_alloc_pages(nid,
+ bus_offset, page_shift, window_size,
+ levels, alloc_userspace_copy, tbl);
+ if (ret) {
+ iommu_tce_table_put(tbl);
+ return ret;
+ }
+
+ *ptbl = tbl;
+
+ return 0;
+}
+
+static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
+{
+ struct iommu_table *tbl = NULL;
+ long rc;
+ unsigned long res_start, res_end;
+
+ /*
+ * crashkernel= specifies the kdump kernel's maximum memory at
+ * some offset and there is no guaranteed the result is a power
+ * of 2, which will cause errors later.
+ */
+ const u64 max_memory = __rounddown_pow_of_two(memory_hotplug_max());
+
+ /*
+ * In memory constrained environments, e.g. kdump kernel, the
+ * DMA window can be larger than available memory, which will
+ * cause errors later.
+ */
+ const u64 maxblock = 1UL << (PAGE_SHIFT + MAX_ORDER);
+
+ /*
+ * We create the default window as big as we can. The constraint is
+ * the max order of allocation possible. The TCE table is likely to
+ * end up being multilevel and with on-demand allocation in place,
+ * the initial use is not going to be huge as the default window aims
+ * to support crippled devices (i.e. not fully 64bit DMAble) only.
+ */
+ /* iommu_table::it_map uses 1 bit per IOMMU page, hence 8 */
+ const u64 window_size = min((maxblock * 8) << PAGE_SHIFT, max_memory);
+ /* Each TCE level cannot exceed maxblock so go multilevel if needed */
+ unsigned long tces_order = ilog2(window_size >> PAGE_SHIFT);
+ unsigned long tcelevel_order = ilog2(maxblock >> 3);
+ unsigned int levels = tces_order / tcelevel_order;
+
+ if (tces_order % tcelevel_order)
+ levels += 1;
+ /*
+ * We try to stick to default levels (which is >1 at the moment) in
+ * order to save memory by relying on on-demain TCE level allocation.
+ */
+ levels = max_t(unsigned int, levels, POWERNV_IOMMU_DEFAULT_LEVELS);
+
+ rc = pnv_pci_ioda2_create_table(&pe->table_group, 0, PAGE_SHIFT,
+ window_size, levels, false, &tbl);
+ if (rc) {
+ pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
+ rc);
+ return rc;
+ }
+
+ /* We use top part of 32bit space for MMIO so exclude it from DMA */
+ res_start = 0;
+ res_end = 0;
+ if (window_size > pe->phb->ioda.m32_pci_base) {
+ res_start = pe->phb->ioda.m32_pci_base >> tbl->it_page_shift;
+ res_end = min(window_size, SZ_4G) >> tbl->it_page_shift;
+ }
+
+ tbl->it_index = (pe->phb->hose->global_number << 16) | pe->pe_number;
+ if (iommu_init_table(tbl, pe->phb->hose->node, res_start, res_end))
+ rc = pnv_pci_ioda2_set_window(&pe->table_group, 0, tbl);
+ else
+ rc = -ENOMEM;
+ if (rc) {
+ pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n", rc);
+ iommu_tce_table_put(tbl);
+ tbl = NULL; /* This clears iommu_table_base below */
+ }
+ if (!pnv_iommu_bypass_disabled)
+ pnv_pci_ioda2_set_bypass(pe, true);
+
+ /*
+ * Set table base for the case of IOMMU DMA use. Usually this is done
+ * from dma_dev_setup() which is not called when a device is returned
+ * from VFIO so do it here.
+ */
+ if (pe->pdev)
+ set_iommu_table_base(&pe->pdev->dev, tbl);
+
+ return 0;
+}
+
+static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
+ int num)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ struct pnv_phb *phb = pe->phb;
+ long ret;
+
+ pe_info(pe, "Removing DMA window #%d\n", num);
+
+ ret = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+ (pe->pe_number << 1) + num,
+ 0/* levels */, 0/* table address */,
+ 0/* table size */, 0/* page size */);
+ if (ret)
+ pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
+ else
+ pnv_pci_ioda2_tce_invalidate_pe(pe);
+
+ pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
+
+ return ret;
+}
+
+#ifdef CONFIG_IOMMU_API
+unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+ __u64 window_size, __u32 levels)
+{
+ unsigned long bytes = 0;
+ const unsigned window_shift = ilog2(window_size);
+ unsigned entries_shift = window_shift - page_shift;
+ unsigned table_shift = entries_shift + 3;
+ unsigned long tce_table_size = max(0x1000UL, 1UL << table_shift);
+ unsigned long direct_table_size;
+
+ if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
+ !is_power_of_2(window_size))
+ return 0;
+
+ /* Calculate a direct table size from window_size and levels */
+ entries_shift = (entries_shift + levels - 1) / levels;
+ table_shift = entries_shift + 3;
+ table_shift = max_t(unsigned, table_shift, PAGE_SHIFT);
+ direct_table_size = 1UL << table_shift;
+
+ for ( ; levels; --levels) {
+ bytes += ALIGN(tce_table_size, direct_table_size);
+
+ tce_table_size /= direct_table_size;
+ tce_table_size <<= 3;
+ tce_table_size = max_t(unsigned long,
+ tce_table_size, direct_table_size);
+ }
+
+ return bytes + bytes; /* one for HW table, one for userspace copy */
+}
+
+static long pnv_pci_ioda2_create_table_userspace(
+ struct iommu_table_group *table_group,
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
+{
+ long ret = pnv_pci_ioda2_create_table(table_group,
+ num, page_shift, window_size, levels, true, ptbl);
+
+ if (!ret)
+ (*ptbl)->it_allocated_size = pnv_pci_ioda2_get_table_size(
+ page_shift, window_size, levels);
+ return ret;
+}
+
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
+{
+ struct pci_dev *dev;
+
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
+ dev->dev.archdata.dma_offset = pe->tce_bypass_base;
+
+ if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
+ pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+ }
+}
+
+static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */
+ struct iommu_table *tbl = pe->table_group.tables[0];
+
+ /*
+ * iommu_ops transfers the ownership per a device and we mode
+ * the group ownership with the first device in the group.
+ */
+ if (!tbl)
+ return 0;
+
+ pnv_pci_ioda2_set_bypass(pe, false);
+ pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+ if (pe->pbus)
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
+ else if (pe->pdev)
+ set_iommu_table_base(&pe->pdev->dev, NULL);
+ iommu_tce_table_put(tbl);
+
+ return 0;
+}
+
+static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
+{
+ struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+
+ /* See the comment about iommu_ops above */
+ if (pe->table_group.tables[0])
+ return;
+ pnv_pci_ioda2_setup_default_config(pe);
+ if (pe->pbus)
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
+}
+
+static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+ .create_table = pnv_pci_ioda2_create_table_userspace,
+ .set_window = pnv_pci_ioda2_set_window,
+ .unset_window = pnv_pci_ioda2_unset_window,
+ .take_ownership = pnv_ioda2_take_ownership,
+ .release_ownership = pnv_ioda2_release_ownership,
+};
+#endif
+
+void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+ struct pnv_ioda_pe *pe)
+{
+ int64_t rc;
+
+ /* TVE #1 is selected by PCI address bit 59 */
+ pe->tce_bypass_base = 1ull << 59;
+
+ /* The PE will reserve all possible 32-bits space */
+ pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
+ phb->ioda.m32_pci_base);
+
+ /* Setup linux iommu table */
+ pe->table_group.tce32_start = 0;
+ pe->table_group.tce32_size = phb->ioda.m32_pci_base;
+ pe->table_group.max_dynamic_windows_supported =
+ IOMMU_TABLE_GROUP_MAX_TABLES;
+ pe->table_group.max_levels = POWERNV_IOMMU_MAX_LEVELS;
+ pe->table_group.pgsizes = pnv_ioda_parse_tce_sizes(phb);
+
+ rc = pnv_pci_ioda2_setup_default_config(pe);
+ if (rc)
+ return;
+
+#ifdef CONFIG_IOMMU_API
+ pe->table_group.ops = &pnv_pci_ioda2_ops;
+ iommu_register_group(&pe->table_group, phb->hose->global_number,
+ pe->pe_number);
+#endif
+ pe->dma_setup_done = true;
+}
+
+/*
+ * Called from KVM in real mode to EOI passthru interrupts. The ICP
+ * EOI is handled directly in KVM in kvmppc_deliver_irq_passthru().
+ *
+ * The IRQ data is mapped in the PCI-MSI domain and the EOI OPAL call
+ * needs an HW IRQ number mapped in the XICS IRQ domain. The HW IRQ
+ * numbers of the in-the-middle MSI domain are vector numbers and it's
+ * good enough for OPAL. Use that.
+ */
+int64_t pnv_opal_pci_msi_eoi(struct irq_data *d)
+{
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d->parent_data);
+ struct pnv_phb *phb = hose->private_data;
+
+ return opal_pci_msi_eoi(phb->opal_id, d->parent_data->hwirq);
+}
+
+/*
+ * The IRQ data is mapped in the XICS domain, with OPAL HW IRQ numbers
+ */
+static void pnv_ioda2_msi_eoi(struct irq_data *d)
+{
+ int64_t rc;
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
+
+ rc = opal_pci_msi_eoi(phb->opal_id, hw_irq);
+ WARN_ON_ONCE(rc);
+
+ icp_native_eoi(d);
+}
+
+/* P8/CXL only */
+void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq)
+{
+ struct irq_data *idata;
+ struct irq_chip *ichip;
+
+ /* The MSI EOI OPAL call is only needed on PHB3 */
+ if (phb->model != PNV_PHB_MODEL_PHB3)
+ return;
+
+ if (!phb->ioda.irq_chip_init) {
+ /*
+ * First time we setup an MSI IRQ, we need to setup the
+ * corresponding IRQ chip to route correctly.
+ */
+ idata = irq_get_irq_data(virq);
+ ichip = irq_data_get_irq_chip(idata);
+ phb->ioda.irq_chip_init = 1;
+ phb->ioda.irq_chip = *ichip;
+ phb->ioda.irq_chip.irq_eoi = pnv_ioda2_msi_eoi;
+ }
+ irq_set_chip(virq, &phb->ioda.irq_chip);
+ irq_set_chip_data(virq, phb->hose);
+}
+
+static struct irq_chip pnv_pci_msi_irq_chip;
+
+/*
+ * Returns true iff chip is something that we could call
+ * pnv_opal_pci_msi_eoi for.
+ */
+bool is_pnv_opal_msi(struct irq_chip *chip)
+{
+ return chip == &pnv_pci_msi_irq_chip;
+}
+EXPORT_SYMBOL_GPL(is_pnv_opal_msi);
+
+static int __pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
+ unsigned int xive_num,
+ unsigned int is_64, struct msi_msg *msg)
+{
+ struct pnv_ioda_pe *pe = pnv_ioda_get_pe(dev);
+ __be32 data;
+ int rc;
+
+ dev_dbg(&dev->dev, "%s: setup %s-bit MSI for vector #%d\n", __func__,
+ is_64 ? "64" : "32", xive_num);
+
+ /* No PE assigned ? bail out ... no MSI for you ! */
+ if (pe == NULL)
+ return -ENXIO;
+
+ /* Check if we have an MVE */
+ if (pe->mve_number < 0)
+ return -ENXIO;
+
+ /* Force 32-bit MSI on some broken devices */
+ if (dev->no_64bit_msi)
+ is_64 = 0;
+
+ /* Assign XIVE to PE */
+ rc = opal_pci_set_xive_pe(phb->opal_id, pe->pe_number, xive_num);
+ if (rc) {
+ pr_warn("%s: OPAL error %d setting XIVE %d PE\n",
+ pci_name(dev), rc, xive_num);
+ return -EIO;
+ }
+
+ if (is_64) {
+ __be64 addr64;
+
+ rc = opal_get_msi_64(phb->opal_id, pe->mve_number, xive_num, 1,
+ &addr64, &data);
+ if (rc) {
+ pr_warn("%s: OPAL error %d getting 64-bit MSI data\n",
+ pci_name(dev), rc);
+ return -EIO;
+ }
+ msg->address_hi = be64_to_cpu(addr64) >> 32;
+ msg->address_lo = be64_to_cpu(addr64) & 0xfffffffful;
+ } else {
+ __be32 addr32;
+
+ rc = opal_get_msi_32(phb->opal_id, pe->mve_number, xive_num, 1,
+ &addr32, &data);
+ if (rc) {
+ pr_warn("%s: OPAL error %d getting 32-bit MSI data\n",
+ pci_name(dev), rc);
+ return -EIO;
+ }
+ msg->address_hi = 0;
+ msg->address_lo = be32_to_cpu(addr32);
+ }
+ msg->data = be32_to_cpu(data);
+
+ return 0;
+}
+
+/*
+ * The msi_free() op is called before irq_domain_free_irqs_top() when
+ * the handler data is still available. Use that to clear the XIVE
+ * controller.
+ */
+static void pnv_msi_ops_msi_free(struct irq_domain *domain,
+ struct msi_domain_info *info,
+ unsigned int irq)
+{
+ if (xive_enabled())
+ xive_irq_free_data(irq);
+}
+
+static struct msi_domain_ops pnv_pci_msi_domain_ops = {
+ .msi_free = pnv_msi_ops_msi_free,
+};
+
+static void pnv_msi_shutdown(struct irq_data *d)
+{
+ d = d->parent_data;
+ if (d->chip->irq_shutdown)
+ d->chip->irq_shutdown(d);
+}
+
+static void pnv_msi_mask(struct irq_data *d)
+{
+ pci_msi_mask_irq(d);
+ irq_chip_mask_parent(d);
+}
+
+static void pnv_msi_unmask(struct irq_data *d)
+{
+ pci_msi_unmask_irq(d);
+ irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip pnv_pci_msi_irq_chip = {
+ .name = "PNV-PCI-MSI",
+ .irq_shutdown = pnv_msi_shutdown,
+ .irq_mask = pnv_msi_mask,
+ .irq_unmask = pnv_msi_unmask,
+ .irq_eoi = irq_chip_eoi_parent,
+};
+
+static struct msi_domain_info pnv_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
+ .ops = &pnv_pci_msi_domain_ops,
+ .chip = &pnv_pci_msi_irq_chip,
+};
+
+static void pnv_msi_compose_msg(struct irq_data *d, struct msi_msg *msg)
+{
+ struct msi_desc *entry = irq_data_get_msi_desc(d);
+ struct pci_dev *pdev = msi_desc_to_pci_dev(entry);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
+ int rc;
+
+ rc = __pnv_pci_ioda_msi_setup(phb, pdev, d->hwirq,
+ entry->pci.msi_attrib.is_64, msg);
+ if (rc)
+ dev_err(&pdev->dev, "Failed to setup %s-bit MSI #%ld : %d\n",
+ entry->pci.msi_attrib.is_64 ? "64" : "32", d->hwirq, rc);
+}
+
+/*
+ * The IRQ data is mapped in the MSI domain in which HW IRQ numbers
+ * correspond to vector numbers.
+ */
+static void pnv_msi_eoi(struct irq_data *d)
+{
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
+
+ if (phb->model == PNV_PHB_MODEL_PHB3) {
+ /*
+ * The EOI OPAL call takes an OPAL HW IRQ number but
+ * since it is translated into a vector number in
+ * OPAL, use that directly.
+ */
+ WARN_ON_ONCE(opal_pci_msi_eoi(phb->opal_id, d->hwirq));
+ }
+
+ irq_chip_eoi_parent(d);
+}
+
+static struct irq_chip pnv_msi_irq_chip = {
+ .name = "PNV-MSI",
+ .irq_shutdown = pnv_msi_shutdown,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = pnv_msi_eoi,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_compose_msi_msg = pnv_msi_compose_msg,
+};
+
+static int pnv_irq_parent_domain_alloc(struct irq_domain *domain,
+ unsigned int virq, int hwirq)
+{
+ struct irq_fwspec parent_fwspec;
+ int ret;
+
+ parent_fwspec.fwnode = domain->parent->fwnode;
+ parent_fwspec.param_count = 2;
+ parent_fwspec.param[0] = hwirq;
+ parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int pnv_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct pci_controller *hose = domain->host_data;
+ struct pnv_phb *phb = hose->private_data;
+ msi_alloc_info_t *info = arg;
+ struct pci_dev *pdev = msi_desc_to_pci_dev(info->desc);
+ int hwirq;
+ int i, ret;
+
+ hwirq = msi_bitmap_alloc_hwirqs(&phb->msi_bmp, nr_irqs);
+ if (hwirq < 0) {
+ dev_warn(&pdev->dev, "failed to find a free MSI\n");
+ return -ENOSPC;
+ }
+
+ dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+ hose->dn, virq, hwirq, nr_irqs);
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = pnv_irq_parent_domain_alloc(domain, virq + i,
+ phb->msi_base + hwirq + i);
+ if (ret)
+ goto out;
+
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &pnv_msi_irq_chip, hose);
+ }
+
+ return 0;
+
+out:
+ irq_domain_free_irqs_parent(domain, virq, i - 1);
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq, nr_irqs);
+ return ret;
+}
+
+static void pnv_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct pci_controller *hose = irq_data_get_irq_chip_data(d);
+ struct pnv_phb *phb = hose->private_data;
+
+ pr_debug("%s bridge %pOF %d/%lx #%d\n", __func__, hose->dn,
+ virq, d->hwirq, nr_irqs);
+
+ msi_bitmap_free_hwirqs(&phb->msi_bmp, d->hwirq, nr_irqs);
+ /* XIVE domain is cleared through ->msi_free() */
+}
+
+static const struct irq_domain_ops pnv_irq_domain_ops = {
+ .alloc = pnv_irq_domain_alloc,
+ .free = pnv_irq_domain_free,
+};
+
+static int __init pnv_msi_allocate_domains(struct pci_controller *hose, unsigned int count)
+{
+ struct pnv_phb *phb = hose->private_data;
+ struct irq_domain *parent = irq_get_default_host();
+
+ hose->fwnode = irq_domain_alloc_named_id_fwnode("PNV-MSI", phb->opal_id);
+ if (!hose->fwnode)
+ return -ENOMEM;
+
+ hose->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+ hose->fwnode,
+ &pnv_irq_domain_ops, hose);
+ if (!hose->dev_domain) {
+ pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
+ irq_domain_free_fwnode(hose->fwnode);
+ return -ENOMEM;
+ }
+
+ hose->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(hose->dn),
+ &pnv_msi_domain_info,
+ hose->dev_domain);
+ if (!hose->msi_domain) {
+ pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
+ irq_domain_free_fwnode(hose->fwnode);
+ irq_domain_remove(hose->dev_domain);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void __init pnv_pci_init_ioda_msis(struct pnv_phb *phb)
+{
+ unsigned int count;
+ const __be32 *prop = of_get_property(phb->hose->dn,
+ "ibm,opal-msi-ranges", NULL);
+ if (!prop) {
+ /* BML Fallback */
+ prop = of_get_property(phb->hose->dn, "msi-ranges", NULL);
+ }
+ if (!prop)
+ return;
+
+ phb->msi_base = be32_to_cpup(prop);
+ count = be32_to_cpup(prop + 1);
+ if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) {
+ pr_err("PCI %d: Failed to allocate MSI bitmap !\n",
+ phb->hose->global_number);
+ return;
+ }
+
+ pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
+ count, phb->msi_base);
+
+ pnv_msi_allocate_domains(phb->hose, count);
+}
+
+static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
+ struct resource *res)
+{
+ struct pnv_phb *phb = pe->phb;
+ struct pci_bus_region region;
+ int index;
+ int64_t rc;
+
+ if (!res || !res->flags || res->start > res->end ||
+ res->flags & IORESOURCE_UNSET)
+ return;
+
+ if (res->flags & IORESOURCE_IO) {
+ region.start = res->start - phb->ioda.io_pci_base;
+ region.end = res->end - phb->ioda.io_pci_base;
+ index = region.start / phb->ioda.io_segsize;
+
+ while (index < phb->ioda.total_pe_num &&
+ region.start <= region.end) {
+ phb->ioda.io_segmap[index] = pe->pe_number;
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_IO_WINDOW_TYPE, 0, index);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("%s: Error %lld mapping IO segment#%d to PE#%x\n",
+ __func__, rc, index, pe->pe_number);
+ break;
+ }
+
+ region.start += phb->ioda.io_segsize;
+ index++;
+ }
+ } else if ((res->flags & IORESOURCE_MEM) &&
+ !pnv_pci_is_m64(phb, res)) {
+ region.start = res->start -
+ phb->hose->mem_offset[0] -
+ phb->ioda.m32_pci_base;
+ region.end = res->end -
+ phb->hose->mem_offset[0] -
+ phb->ioda.m32_pci_base;
+ index = region.start / phb->ioda.m32_segsize;
+
+ while (index < phb->ioda.total_pe_num &&
+ region.start <= region.end) {
+ phb->ioda.m32_segmap[index] = pe->pe_number;
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe->pe_number, OPAL_M32_WINDOW_TYPE, 0, index);
+ if (rc != OPAL_SUCCESS) {
+ pr_err("%s: Error %lld mapping M32 segment#%d to PE#%x",
+ __func__, rc, index, pe->pe_number);
+ break;
+ }
+
+ region.start += phb->ioda.m32_segsize;
+ index++;
+ }
+ }
+}
+
+/*
+ * This function is supposed to be called on basis of PE from top
+ * to bottom style. So the I/O or MMIO segment assigned to
+ * parent PE could be overridden by its child PEs if necessary.
+ */
+static void pnv_ioda_setup_pe_seg(struct pnv_ioda_pe *pe)
+{
+ struct pci_dev *pdev;
+ int i;
+
+ /*
+ * NOTE: We only care PCI bus based PE for now. For PCI
+ * device based PE, for example SRIOV sensitive VF should
+ * be figured out later.
+ */
+ BUG_ON(!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)));
+
+ list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++)
+ pnv_ioda_setup_pe_res(pe, &pdev->resource[i]);
+
+ /*
+ * If the PE contains all subordinate PCI buses, the
+ * windows of the child bridges should be mapped to
+ * the PE as well.
+ */
+ if (!(pe->flags & PNV_IODA_PE_BUS_ALL) || !pci_is_bridge(pdev))
+ continue;
+ for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++)
+ pnv_ioda_setup_pe_res(pe,
+ &pdev->resource[PCI_BRIDGE_RESOURCES + i]);
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int pnv_pci_diag_data_set(void *data, u64 val)
+{
+ struct pnv_phb *phb = data;
+ s64 ret;
+
+ /* Retrieve the diag data from firmware */
+ ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
+ phb->diag_data_size);
+ if (ret != OPAL_SUCCESS)
+ return -EIO;
+
+ /* Print the diag data to the kernel log */
+ pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set,
+ "%llu\n");
+
+static int pnv_pci_ioda_pe_dump(void *data, u64 val)
+{
+ struct pnv_phb *phb = data;
+ int pe_num;
+
+ for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
+ struct pnv_ioda_pe *pe = &phb->ioda.pe_array[pe_num];
+
+ if (!test_bit(pe_num, phb->ioda.pe_alloc))
+ continue;
+
+ pe_warn(pe, "rid: %04x dev count: %2d flags: %s%s%s%s%s%s\n",
+ pe->rid, pe->device_count,
+ (pe->flags & PNV_IODA_PE_DEV) ? "dev " : "",
+ (pe->flags & PNV_IODA_PE_BUS) ? "bus " : "",
+ (pe->flags & PNV_IODA_PE_BUS_ALL) ? "all " : "",
+ (pe->flags & PNV_IODA_PE_MASTER) ? "master " : "",
+ (pe->flags & PNV_IODA_PE_SLAVE) ? "slave " : "",
+ (pe->flags & PNV_IODA_PE_VF) ? "vf " : "");
+ }
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_ioda_pe_dump_fops, NULL,
+ pnv_pci_ioda_pe_dump, "%llu\n");
+
+#endif /* CONFIG_DEBUG_FS */
+
+static void pnv_pci_ioda_create_dbgfs(void)
+{
+#ifdef CONFIG_DEBUG_FS
+ struct pci_controller *hose, *tmp;
+ struct pnv_phb *phb;
+ char name[16];
+
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ sprintf(name, "PCI%04x", hose->global_number);
+ phb->dbgfs = debugfs_create_dir(name, arch_debugfs_dir);
+
+ debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs,
+ phb, &pnv_pci_diag_data_fops);
+ debugfs_create_file_unsafe("dump_ioda_pe_state", 0200, phb->dbgfs,
+ phb, &pnv_pci_ioda_pe_dump_fops);
+ }
+#endif /* CONFIG_DEBUG_FS */
+}
+
+static void pnv_pci_enable_bridge(struct pci_bus *bus)
+{
+ struct pci_dev *dev = bus->self;
+ struct pci_bus *child;
+
+ /* Empty bus ? bail */
+ if (list_empty(&bus->devices))
+ return;
+
+ /*
+ * If there's a bridge associated with that bus enable it. This works
+ * around races in the generic code if the enabling is done during
+ * parallel probing. This can be removed once those races have been
+ * fixed.
+ */
+ if (dev) {
+ int rc = pci_enable_device(dev);
+ if (rc)
+ pci_err(dev, "Error enabling bridge (%d)\n", rc);
+ pci_set_master(dev);
+ }
+
+ /* Perform the same to child busses */
+ list_for_each_entry(child, &bus->children, node)
+ pnv_pci_enable_bridge(child);
+}
+
+static void pnv_pci_enable_bridges(void)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node)
+ pnv_pci_enable_bridge(hose->bus);
+}
+
+static void pnv_pci_ioda_fixup(void)
+{
+ pnv_pci_ioda_create_dbgfs();
+
+ pnv_pci_enable_bridges();
+
+#ifdef CONFIG_EEH
+ pnv_eeh_post_init();
+#endif
+}
+
+/*
+ * Returns the alignment for I/O or memory windows for P2P
+ * bridges. That actually depends on how PEs are segmented.
+ * For now, we return I/O or M32 segment size for PE sensitive
+ * P2P bridges. Otherwise, the default values (4KiB for I/O,
+ * 1MiB for memory) will be returned.
+ *
+ * The current PCI bus might be put into one PE, which was
+ * create against the parent PCI bridge. For that case, we
+ * needn't enlarge the alignment so that we can save some
+ * resources.
+ */
+static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
+ unsigned long type)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
+ int num_pci_bridges = 0;
+ struct pci_dev *bridge;
+
+ bridge = bus->self;
+ while (bridge) {
+ if (pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) {
+ num_pci_bridges++;
+ if (num_pci_bridges >= 2)
+ return 1;
+ }
+
+ bridge = bridge->bus->self;
+ }
+
+ /*
+ * We fall back to M32 if M64 isn't supported. We enforce the M64
+ * alignment for any 64-bit resource, PCIe doesn't care and
+ * bridges only do 64-bit prefetchable anyway.
+ */
+ if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type))
+ return phb->ioda.m64_segsize;
+ if (type & IORESOURCE_MEM)
+ return phb->ioda.m32_segsize;
+
+ return phb->ioda.io_segsize;
+}
+
+/*
+ * We are updating root port or the upstream port of the
+ * bridge behind the root port with PHB's windows in order
+ * to accommodate the changes on required resources during
+ * PCI (slot) hotplug, which is connected to either root
+ * port or the downstream ports of PCIe switch behind the
+ * root port.
+ */
+static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
+ unsigned long type)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+ struct pnv_phb *phb = hose->private_data;
+ struct pci_dev *bridge = bus->self;
+ struct resource *r, *w;
+ bool msi_region = false;
+ int i;
+
+ /* Check if we need apply fixup to the bridge's windows */
+ if (!pci_is_root_bus(bridge->bus) &&
+ !pci_is_root_bus(bridge->bus->self->bus))
+ return;
+
+ /* Fixup the resources */
+ for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
+ r = &bridge->resource[PCI_BRIDGE_RESOURCES + i];
+ if (!r->flags || !r->parent)
+ continue;
+
+ w = NULL;
+ if (r->flags & type & IORESOURCE_IO)
+ w = &hose->io_resource;
+ else if (pnv_pci_is_m64(phb, r) &&
+ (type & IORESOURCE_PREFETCH) &&
+ phb->ioda.m64_segsize)
+ w = &hose->mem_resources[1];
+ else if (r->flags & type & IORESOURCE_MEM) {
+ w = &hose->mem_resources[0];
+ msi_region = true;
+ }
+
+ r->start = w->start;
+ r->end = w->end;
+
+ /* The 64KB 32-bits MSI region shouldn't be included in
+ * the 32-bits bridge window. Otherwise, we can see strange
+ * issues. One of them is EEH error observed on Garrison.
+ *
+ * Exclude top 1MB region which is the minimal alignment of
+ * 32-bits bridge window.
+ */
+ if (msi_region) {
+ r->end += 0x10000;
+ r->end -= 0x100000;
+ }
+ }
+}
+
+static void pnv_pci_configure_bus(struct pci_bus *bus)
+{
+ struct pci_dev *bridge = bus->self;
+ struct pnv_ioda_pe *pe;
+ bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
+
+ dev_info(&bus->dev, "Configuring PE for bus\n");
+
+ /* Don't assign PE to PCI bus, which doesn't have subordinate devices */
+ if (WARN_ON(list_empty(&bus->devices)))
+ return;
+
+ /* Reserve PEs according to used M64 resources */
+ pnv_ioda_reserve_m64_pe(bus, NULL, all);
+
+ /*
+ * Assign PE. We might run here because of partial hotplug.
+ * For the case, we just pick up the existing PE and should
+ * not allocate resources again.
+ */
+ pe = pnv_ioda_setup_bus_PE(bus, all);
+ if (!pe)
+ return;
+
+ pnv_ioda_setup_pe_seg(pe);
+}
+
+static resource_size_t pnv_pci_default_alignment(void)
+{
+ return PAGE_SIZE;
+}
+
+/* Prevent enabling devices for which we couldn't properly
+ * assign a PE
+ */
+static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
+{
+ struct pci_dn *pdn;
+
+ pdn = pci_get_pdn(dev);
+ if (!pdn || pdn->pe_number == IODA_INVALID_PE) {
+ pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev)
+{
+ struct pci_dn *pdn;
+ struct pnv_ioda_pe *pe;
+
+ pdn = pci_get_pdn(dev);
+ if (!pdn)
+ return false;
+
+ if (pdn->pe_number == IODA_INVALID_PE) {
+ pe = pnv_ioda_setup_dev_PE(dev);
+ if (!pe)
+ return false;
+ }
+ return true;
+}
+
+void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
+{
+ struct iommu_table *tbl = pe->table_group.tables[0];
+ int64_t rc;
+
+ if (!pe->dma_setup_done)
+ return;
+
+ rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
+ if (rc)
+ pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
+
+ pnv_pci_ioda2_set_bypass(pe, false);
+ if (pe->table_group.group) {
+ iommu_group_put(pe->table_group.group);
+ WARN_ON(pe->table_group.group);
+ }
+
+ iommu_tce_table_put(tbl);
+}
+
+static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
+ unsigned short win,
+ unsigned int *map)
+{
+ struct pnv_phb *phb = pe->phb;
+ int idx;
+ int64_t rc;
+
+ for (idx = 0; idx < phb->ioda.total_pe_num; idx++) {
+ if (map[idx] != pe->pe_number)
+ continue;
+
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ phb->ioda.reserved_pe_idx, win, 0, idx);
+
+ if (rc != OPAL_SUCCESS)
+ pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n",
+ rc, win, idx);
+
+ map[idx] = IODA_INVALID_PE;
+ }
+}
+
+static void pnv_ioda_release_pe_seg(struct pnv_ioda_pe *pe)
+{
+ struct pnv_phb *phb = pe->phb;
+
+ if (phb->type == PNV_PHB_IODA2) {
+ pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
+ phb->ioda.m32_segmap);
+ }
+}
+
+static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
+{
+ struct pnv_phb *phb = pe->phb;
+ struct pnv_ioda_pe *slave, *tmp;
+
+ pe_info(pe, "Releasing PE\n");
+
+ mutex_lock(&phb->ioda.pe_list_mutex);
+ list_del(&pe->list);
+ mutex_unlock(&phb->ioda.pe_list_mutex);
+
+ switch (phb->type) {
+ case PNV_PHB_IODA2:
+ pnv_pci_ioda2_release_pe_dma(pe);
+ break;
+ case PNV_PHB_NPU_OCAPI:
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ pnv_ioda_release_pe_seg(pe);
+ pnv_ioda_deconfigure_pe(pe->phb, pe);
+
+ /* Release slave PEs in the compound PE */
+ if (pe->flags & PNV_IODA_PE_MASTER) {
+ list_for_each_entry_safe(slave, tmp, &pe->slaves, list) {
+ list_del(&slave->list);
+ pnv_ioda_free_pe(slave);
+ }
+ }
+
+ /*
+ * The PE for root bus can be removed because of hotplug in EEH
+ * recovery for fenced PHB error. We need to mark the PE dead so
+ * that it can be populated again in PCI hot add path. The PE
+ * shouldn't be destroyed as it's the global reserved resource.
+ */
+ if (phb->ioda.root_pe_idx == pe->pe_number)
+ return;
+
+ pnv_ioda_free_pe(pe);
+}
+
+static void pnv_pci_release_device(struct pci_dev *pdev)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+ struct pnv_ioda_pe *pe;
+
+ /* The VF PE state is torn down when sriov_disable() is called */
+ if (pdev->is_virtfn)
+ return;
+
+ if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+ return;
+
+#ifdef CONFIG_PCI_IOV
+ /*
+ * FIXME: Try move this to sriov_disable(). It's here since we allocate
+ * the iov state at probe time since we need to fiddle with the IOV
+ * resources.
+ */
+ if (pdev->is_physfn)
+ kfree(pdev->dev.archdata.iov_data);
+#endif
+
+ /*
+ * PCI hotplug can happen as part of EEH error recovery. The @pdn
+ * isn't removed and added afterwards in this scenario. We should
+ * set the PE number in @pdn to an invalid one. Otherwise, the PE's
+ * device count is decreased on removing devices while failing to
+ * be increased on adding devices. It leads to unbalanced PE's device
+ * count and eventually make normal PCI hotplug path broken.
+ */
+ pe = &phb->ioda.pe_array[pdn->pe_number];
+ pdn->pe_number = IODA_INVALID_PE;
+
+ WARN_ON(--pe->device_count < 0);
+ if (pe->device_count == 0)
+ pnv_ioda_release_pe(pe);
+}
+
+static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
+{
+ struct pnv_phb *phb = hose->private_data;
+
+ opal_pci_reset(phb->opal_id, OPAL_RESET_PCI_IODA_TABLE,
+ OPAL_ASSERT_RESET);
+}
+
+static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
+ struct pnv_ioda_pe *pe;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ if (!(pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)))
+ continue;
+
+ if (!pe->pbus)
+ continue;
+
+ if (bus->number == ((pe->rid >> 8) & 0xFF)) {
+ pe->pbus = bus;
+ break;
+ }
+ }
+}
+
+#ifdef CONFIG_IOMMU_API
+static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose,
+ struct pci_dev *pdev)
+{
+ struct pnv_phb *phb = hose->private_data;
+ struct pnv_ioda_pe *pe;
+
+ if (WARN_ON(!phb))
+ return ERR_PTR(-ENODEV);
+
+ pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev));
+ if (!pe)
+ return ERR_PTR(-ENODEV);
+
+ if (!pe->table_group.group)
+ return ERR_PTR(-ENODEV);
+
+ return iommu_group_ref_get(pe->table_group.group);
+}
+#endif
+
+static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
+ .dma_dev_setup = pnv_pci_ioda_dma_dev_setup,
+ .dma_bus_setup = pnv_pci_ioda_dma_bus_setup,
+ .iommu_bypass_supported = pnv_pci_ioda_iommu_bypass_supported,
+ .enable_device_hook = pnv_pci_enable_device_hook,
+ .release_device = pnv_pci_release_device,
+ .window_alignment = pnv_pci_window_alignment,
+ .setup_bridge = pnv_pci_fixup_bridge_resources,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .shutdown = pnv_pci_ioda_shutdown,
+#ifdef CONFIG_IOMMU_API
+ .device_group = pnv_pci_device_group,
+#endif
+};
+
+static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
+ .enable_device_hook = pnv_ocapi_enable_device_hook,
+ .release_device = pnv_pci_release_device,
+ .window_alignment = pnv_pci_window_alignment,
+ .reset_secondary_bus = pnv_pci_reset_secondary_bus,
+ .shutdown = pnv_pci_ioda_shutdown,
+};
+
+static void __init pnv_pci_init_ioda_phb(struct device_node *np,
+ u64 hub_id, int ioda_type)
+{
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+ unsigned long size, m64map_off, m32map_off, pemap_off;
+ struct pnv_ioda_pe *root_pe;
+ struct resource r;
+ const __be64 *prop64;
+ const __be32 *prop32;
+ int len;
+ unsigned int segno;
+ u64 phb_id;
+ void *aux;
+ long rc;
+
+ if (!of_device_is_available(np))
+ return;
+
+ pr_info("Initializing %s PHB (%pOF)\n", pnv_phb_names[ioda_type], np);
+
+ prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
+ if (!prop64) {
+ pr_err(" Missing \"ibm,opal-phbid\" property !\n");
+ return;
+ }
+ phb_id = be64_to_cpup(prop64);
+ pr_debug(" PHB-ID : 0x%016llx\n", phb_id);
+
+ phb = kzalloc(sizeof(*phb), GFP_KERNEL);
+ if (!phb)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(*phb));
+
+ /* Allocate PCI controller */
+ phb->hose = hose = pcibios_alloc_controller(np);
+ if (!phb->hose) {
+ pr_err(" Can't allocate PCI controller for %pOF\n",
+ np);
+ memblock_free(phb, sizeof(struct pnv_phb));
+ return;
+ }
+
+ spin_lock_init(&phb->lock);
+ prop32 = of_get_property(np, "bus-range", &len);
+ if (prop32 && len == 8) {
+ hose->first_busno = be32_to_cpu(prop32[0]);
+ hose->last_busno = be32_to_cpu(prop32[1]);
+ } else {
+ pr_warn(" Broken <bus-range> on %pOF\n", np);
+ hose->first_busno = 0;
+ hose->last_busno = 0xff;
+ }
+ hose->private_data = phb;
+ phb->hub_id = hub_id;
+ phb->opal_id = phb_id;
+ phb->type = ioda_type;
+ mutex_init(&phb->ioda.pe_alloc_mutex);
+
+ /* Detect specific models for error handling */
+ if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
+ phb->model = PNV_PHB_MODEL_P7IOC;
+ else if (of_device_is_compatible(np, "ibm,power8-pciex"))
+ phb->model = PNV_PHB_MODEL_PHB3;
+ else
+ phb->model = PNV_PHB_MODEL_UNKNOWN;
+
+ /* Initialize diagnostic data buffer */
+ prop32 = of_get_property(np, "ibm,phb-diag-data-size", NULL);
+ if (prop32)
+ phb->diag_data_size = be32_to_cpup(prop32);
+ else
+ phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
+
+ phb->diag_data = kzalloc(phb->diag_data_size, GFP_KERNEL);
+ if (!phb->diag_data)
+ panic("%s: Failed to allocate %u bytes\n", __func__,
+ phb->diag_data_size);
+
+ /* Parse 32-bit and IO ranges (if any) */
+ pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
+
+ /* Get registers */
+ if (!of_address_to_resource(np, 0, &r)) {
+ phb->regs_phys = r.start;
+ phb->regs = ioremap(r.start, resource_size(&r));
+ if (phb->regs == NULL)
+ pr_err(" Failed to map registers !\n");
+ }
+
+ /* Initialize more IODA stuff */
+ phb->ioda.total_pe_num = 1;
+ prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
+ if (prop32)
+ phb->ioda.total_pe_num = be32_to_cpup(prop32);
+ prop32 = of_get_property(np, "ibm,opal-reserved-pe", NULL);
+ if (prop32)
+ phb->ioda.reserved_pe_idx = be32_to_cpup(prop32);
+
+ /* Invalidate RID to PE# mapping */
+ for (segno = 0; segno < ARRAY_SIZE(phb->ioda.pe_rmap); segno++)
+ phb->ioda.pe_rmap[segno] = IODA_INVALID_PE;
+
+ /* Parse 64-bit MMIO range */
+ pnv_ioda_parse_m64_window(phb);
+
+ phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
+ /* FW Has already off top 64k of M32 space (MSI space) */
+ phb->ioda.m32_size += 0x10000;
+
+ phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe_num;
+ phb->ioda.m32_pci_base = hose->mem_resources[0].start - hose->mem_offset[0];
+ phb->ioda.io_size = hose->pci_io_size;
+ phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe_num;
+ phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
+
+ /* Allocate aux data & arrays. We don't have IO ports on PHB3 */
+ size = ALIGN(max_t(unsigned, phb->ioda.total_pe_num, 8) / 8,
+ sizeof(unsigned long));
+ m64map_off = size;
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.m64_segmap[0]);
+ m32map_off = size;
+ size += phb->ioda.total_pe_num * sizeof(phb->ioda.m32_segmap[0]);
+ pemap_off = size;
+ size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
+ aux = kzalloc(size, GFP_KERNEL);
+ if (!aux)
+ panic("%s: Failed to allocate %lu bytes\n", __func__, size);
+
+ phb->ioda.pe_alloc = aux;
+ phb->ioda.m64_segmap = aux + m64map_off;
+ phb->ioda.m32_segmap = aux + m32map_off;
+ for (segno = 0; segno < phb->ioda.total_pe_num; segno++) {
+ phb->ioda.m64_segmap[segno] = IODA_INVALID_PE;
+ phb->ioda.m32_segmap[segno] = IODA_INVALID_PE;
+ }
+ phb->ioda.pe_array = aux + pemap_off;
+
+ /*
+ * Choose PE number for root bus, which shouldn't have
+ * M64 resources consumed by its child devices. To pick
+ * the PE number adjacent to the reserved one if possible.
+ */
+ pnv_ioda_reserve_pe(phb, phb->ioda.reserved_pe_idx);
+ if (phb->ioda.reserved_pe_idx == 0) {
+ phb->ioda.root_pe_idx = 1;
+ pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
+ } else if (phb->ioda.reserved_pe_idx == (phb->ioda.total_pe_num - 1)) {
+ phb->ioda.root_pe_idx = phb->ioda.reserved_pe_idx - 1;
+ pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
+ } else {
+ /* otherwise just allocate one */
+ root_pe = pnv_ioda_alloc_pe(phb, 1);
+ phb->ioda.root_pe_idx = root_pe->pe_number;
+ }
+
+ INIT_LIST_HEAD(&phb->ioda.pe_list);
+ mutex_init(&phb->ioda.pe_list_mutex);
+
+#if 0 /* We should really do that ... */
+ rc = opal_pci_set_phb_mem_window(opal->phb_id,
+ window_type,
+ window_num,
+ starting_real_address,
+ starting_pci_address,
+ segment_size);
+#endif
+
+ pr_info(" %03d (%03d) PE's M32: 0x%x [segment=0x%x]\n",
+ phb->ioda.total_pe_num, phb->ioda.reserved_pe_idx,
+ phb->ioda.m32_size, phb->ioda.m32_segsize);
+ if (phb->ioda.m64_size)
+ pr_info(" M64: 0x%lx [segment=0x%lx]\n",
+ phb->ioda.m64_size, phb->ioda.m64_segsize);
+ if (phb->ioda.io_size)
+ pr_info(" IO: 0x%x [segment=0x%x]\n",
+ phb->ioda.io_size, phb->ioda.io_segsize);
+
+
+ phb->hose->ops = &pnv_pci_ops;
+ phb->get_pe_state = pnv_ioda_get_pe_state;
+ phb->freeze_pe = pnv_ioda_freeze_pe;
+ phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
+
+ /* Setup MSI support */
+ pnv_pci_init_ioda_msis(phb);
+
+ /*
+ * We pass the PCI probe flag PCI_REASSIGN_ALL_RSRC here
+ * to let the PCI core do resource assignment. It's supposed
+ * that the PCI core will do correct I/O and MMIO alignment
+ * for the P2P bridge bars so that each PCI bus (excluding
+ * the child P2P bridges) can form individual PE.
+ */
+ ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
+
+ switch (phb->type) {
+ case PNV_PHB_NPU_OCAPI:
+ hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
+ break;
+ default:
+ hose->controller_ops = pnv_pci_ioda_controller_ops;
+ }
+
+ ppc_md.pcibios_default_alignment = pnv_pci_default_alignment;
+
+#ifdef CONFIG_PCI_IOV
+ ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov;
+ ppc_md.pcibios_iov_resource_alignment = pnv_pci_iov_resource_alignment;
+ ppc_md.pcibios_sriov_enable = pnv_pcibios_sriov_enable;
+ ppc_md.pcibios_sriov_disable = pnv_pcibios_sriov_disable;
+#endif
+
+ pci_add_flags(PCI_REASSIGN_ALL_RSRC);
+
+ /* Reset IODA tables to a clean state */
+ rc = opal_pci_reset(phb_id, OPAL_RESET_PCI_IODA_TABLE, OPAL_ASSERT_RESET);
+ if (rc)
+ pr_warn(" OPAL Error %ld performing IODA table reset !\n", rc);
+
+ /*
+ * If we're running in kdump kernel, the previous kernel never
+ * shutdown PCI devices correctly. We already got IODA table
+ * cleaned out. So we have to issue PHB reset to stop all PCI
+ * transactions from previous kernel. The ppc_pci_reset_phbs
+ * kernel parameter will force this reset too. Additionally,
+ * if the IODA reset above failed then use a bigger hammer.
+ * This can happen if we get a PHB fatal error in very early
+ * boot.
+ */
+ if (is_kdump_kernel() || pci_reset_phbs || rc) {
+ pr_info(" Issue PHB reset ...\n");
+ pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
+ pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
+ }
+
+ /* Remove M64 resource if we can't configure it successfully */
+ if (!phb->init_m64 || phb->init_m64(phb))
+ hose->mem_resources[1].flags = 0;
+
+ /* create pci_dn's for DT nodes under this PHB */
+ pci_devs_phb_init_dynamic(hose);
+}
+
+void __init pnv_pci_init_ioda2_phb(struct device_node *np)
+{
+ pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
+}
+
+void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
+{
+ pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
+}
+
+static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
+
+ if (!machine_is(powernv))
+ return;
+
+ if (phb->type == PNV_PHB_NPU_OCAPI)
+ dev->cfg_size = PCI_CFG_SPACE_EXP_SIZE;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pnv_npu2_opencapi_cfg_size_fixup);
diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
new file mode 100644
index 000000000..59882da3e
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -0,0 +1,760 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/bitmap.h>
+#include <linux/pci.h>
+
+#include <asm/opal.h>
+
+#include "pci.h"
+
+/*
+ * The majority of the complexity in supporting SR-IOV on PowerNV comes from
+ * the need to put the MMIO space for each VF into a separate PE. Internally
+ * the PHB maps MMIO addresses to a specific PE using the "Memory BAR Table".
+ * The MBT historically only applied to the 64bit MMIO window of the PHB
+ * so it's common to see it referred to as the "M64BT".
+ *
+ * An MBT entry stores the mapped range as an <base>,<mask> pair. This forces
+ * the address range that we want to map to be power-of-two sized and aligned.
+ * For conventional PCI devices this isn't really an issue since PCI device BARs
+ * have the same requirement.
+ *
+ * For a SR-IOV BAR things are a little more awkward since size and alignment
+ * are not coupled. The alignment is set based on the per-VF BAR size, but
+ * the total BAR area is: number-of-vfs * per-vf-size. The number of VFs
+ * isn't necessarily a power of two, so neither is the total size. To fix that
+ * we need to finesse (read: hack) the Linux BAR allocator so that it will
+ * allocate the SR-IOV BARs in a way that lets us map them using the MBT.
+ *
+ * The changes to size and alignment that we need to do depend on the "mode"
+ * of MBT entry that we use. We only support SR-IOV on PHB3 (IODA2) and above,
+ * so as a baseline we can assume that we have the following BAR modes
+ * available:
+ *
+ * NB: $PE_COUNT is the number of PEs that the PHB supports.
+ *
+ * a) A segmented BAR that splits the mapped range into $PE_COUNT equally sized
+ * segments. The n'th segment is mapped to the n'th PE.
+ * b) An un-segmented BAR that maps the whole address range to a specific PE.
+ *
+ *
+ * We prefer to use mode a) since it only requires one MBT entry per SR-IOV BAR
+ * For comparison b) requires one entry per-VF per-BAR, or:
+ * (num-vfs * num-sriov-bars) in total. To use a) we need the size of each segment
+ * to equal the size of the per-VF BAR area. So:
+ *
+ * new_size = per-vf-size * number-of-PEs
+ *
+ * The alignment for the SR-IOV BAR also needs to be changed from per-vf-size
+ * to "new_size", calculated above. Implementing this is a convoluted process
+ * which requires several hooks in the PCI core:
+ *
+ * 1. In pcibios_device_add() we call pnv_pci_ioda_fixup_iov().
+ *
+ * At this point the device has been probed and the device's BARs are sized,
+ * but no resource allocations have been done. The SR-IOV BARs are sized
+ * based on the maximum number of VFs supported by the device and we need
+ * to increase that to new_size.
+ *
+ * 2. Later, when Linux actually assigns resources it tries to make the resource
+ * allocations for each PCI bus as compact as possible. As a part of that it
+ * sorts the BARs on a bus by their required alignment, which is calculated
+ * using pci_resource_alignment().
+ *
+ * For IOV resources this goes:
+ * pci_resource_alignment()
+ * pci_sriov_resource_alignment()
+ * pcibios_sriov_resource_alignment()
+ * pnv_pci_iov_resource_alignment()
+ *
+ * Our hook overrides the default alignment, equal to the per-vf-size, with
+ * new_size computed above.
+ *
+ * 3. When userspace enables VFs for a device:
+ *
+ * sriov_enable()
+ * pcibios_sriov_enable()
+ * pnv_pcibios_sriov_enable()
+ *
+ * This is where we actually allocate PE numbers for each VF and setup the
+ * MBT mapping for each SR-IOV BAR. In steps 1) and 2) we setup an "arena"
+ * where each MBT segment is equal in size to the VF BAR so we can shift
+ * around the actual SR-IOV BAR location within this arena. We need this
+ * ability because the PE space is shared by all devices on the same PHB.
+ * When using mode a) described above segment 0 in maps to PE#0 which might
+ * be already being used by another device on the PHB.
+ *
+ * As a result we need allocate a contigious range of PE numbers, then shift
+ * the address programmed into the SR-IOV BAR of the PF so that the address
+ * of VF0 matches up with the segment corresponding to the first allocated
+ * PE number. This is handled in pnv_pci_vf_resource_shift().
+ *
+ * Once all that is done we return to the PCI core which then enables VFs,
+ * scans them and creates pci_devs for each. The init process for a VF is
+ * largely the same as a normal device, but the VF is inserted into the IODA
+ * PE that we allocated for it rather than the PE associated with the bus.
+ *
+ * 4. When userspace disables VFs we unwind the above in
+ * pnv_pcibios_sriov_disable(). Fortunately this is relatively simple since
+ * we don't need to validate anything, just tear down the mappings and
+ * move SR-IOV resource back to its "proper" location.
+ *
+ * That's how mode a) works. In theory mode b) (single PE mapping) is less work
+ * since we can map each individual VF with a separate BAR. However, there's a
+ * few limitations:
+ *
+ * 1) For IODA2 mode b) has a minimum alignment requirement of 32MB. This makes
+ * it only usable for devices with very large per-VF BARs. Such devices are
+ * similar to Big Foot. They definitely exist, but I've never seen one.
+ *
+ * 2) The number of MBT entries that we have is limited. PHB3 and PHB4 only
+ * 16 total and some are needed for. Most SR-IOV capable network cards can support
+ * more than 16 VFs on each port.
+ *
+ * We use b) when using a) would use more than 1/4 of the entire 64 bit MMIO
+ * window of the PHB.
+ *
+ *
+ *
+ * PHB4 (IODA3) added a few new features that would be useful for SR-IOV. It
+ * allowed the MBT to map 32bit MMIO space in addition to 64bit which allows
+ * us to support SR-IOV BARs in the 32bit MMIO window. This is useful since
+ * the Linux BAR allocation will place any BAR marked as non-prefetchable into
+ * the non-prefetchable bridge window, which is 32bit only. It also added two
+ * new modes:
+ *
+ * c) A segmented BAR similar to a), but each segment can be individually
+ * mapped to any PE. This is matches how the 32bit MMIO window worked on
+ * IODA1&2.
+ *
+ * d) A segmented BAR with 8, 64, or 128 segments. This works similarly to a),
+ * but with fewer segments and configurable base PE.
+ *
+ * i.e. The n'th segment maps to the (n + base)'th PE.
+ *
+ * The base PE is also required to be a multiple of the window size.
+ *
+ * Unfortunately, the OPAL API doesn't currently (as of skiboot v6.6) allow us
+ * to exploit any of the IODA3 features.
+ */
+
+static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+ struct resource *res;
+ int i;
+ resource_size_t vf_bar_sz;
+ struct pnv_iov_data *iov;
+ int mul;
+
+ iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+ if (!iov)
+ goto disable_iov;
+ pdev->dev.archdata.iov_data = iov;
+ mul = phb->ioda.total_pe_num;
+
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &pdev->resource[i + PCI_IOV_RESOURCES];
+ if (!res->flags || res->parent)
+ continue;
+ if (!pnv_pci_is_m64_flags(res->flags)) {
+ dev_warn(&pdev->dev, "Don't support SR-IOV with non M64 VF BAR%d: %pR. \n",
+ i, res);
+ goto disable_iov;
+ }
+
+ vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+
+ /*
+ * Generally, one segmented M64 BAR maps one IOV BAR. However,
+ * if a VF BAR is too large we end up wasting a lot of space.
+ * If each VF needs more than 1/4 of the default m64 segment
+ * then each VF BAR should be mapped in single-PE mode to reduce
+ * the amount of space required. This does however limit the
+ * number of VFs we can support.
+ *
+ * The 1/4 limit is arbitrary and can be tweaked.
+ */
+ if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
+ /*
+ * On PHB3, the minimum size alignment of M64 BAR in
+ * single mode is 32MB. If this VF BAR is smaller than
+ * 32MB, but still too large for a segmented window
+ * then we can't map it and need to disable SR-IOV for
+ * this device.
+ */
+ if (vf_bar_sz < SZ_32M) {
+ pci_err(pdev, "VF BAR%d: %pR can't be mapped in single PE mode\n",
+ i, res);
+ goto disable_iov;
+ }
+
+ iov->m64_single_mode[i] = true;
+ continue;
+ }
+
+ /*
+ * This BAR can be mapped with one segmented window, so adjust
+ * te resource size to accommodate.
+ */
+ pci_dbg(pdev, " Fixing VF BAR%d: %pR to\n", i, res);
+ res->end = res->start + vf_bar_sz * mul - 1;
+ pci_dbg(pdev, " %pR\n", res);
+
+ pci_info(pdev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
+ i, res, mul);
+
+ iov->need_shift = true;
+ }
+
+ return;
+
+disable_iov:
+ /* Save ourselves some MMIO space by disabling the unusable BARs */
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &pdev->resource[i + PCI_IOV_RESOURCES];
+ res->flags = 0;
+ res->end = res->start - 1;
+ }
+
+ pdev->dev.archdata.iov_data = NULL;
+ kfree(iov);
+}
+
+void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
+{
+ if (pdev->is_virtfn) {
+ struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
+
+ /*
+ * VF PEs are single-device PEs so their pdev pointer needs to
+ * be set. The pdev doesn't exist when the PE is allocated (in
+ * (pcibios_sriov_enable()) so we fix it up here.
+ */
+ pe->pdev = pdev;
+ WARN_ON(!(pe->flags & PNV_IODA_PE_VF));
+ } else if (pdev->is_physfn) {
+ /*
+ * For PFs adjust their allocated IOV resources to match what
+ * the PHB can support using it's M64 BAR table.
+ */
+ pnv_pci_ioda_fixup_iov_resources(pdev);
+ }
+}
+
+resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
+ int resno)
+{
+ resource_size_t align = pci_iov_resource_size(pdev, resno);
+ struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+ struct pnv_iov_data *iov = pnv_iov_get(pdev);
+
+ /*
+ * iov can be null if we have an SR-IOV device with IOV BAR that can't
+ * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
+ * In that case we don't allow VFs to be enabled since one of their
+ * BARs would not be placed in the correct PE.
+ */
+ if (!iov)
+ return align;
+
+ /*
+ * If we're using single mode then we can just use the native VF BAR
+ * alignment. We validated that it's possible to use a single PE
+ * window above when we did the fixup.
+ */
+ if (iov->m64_single_mode[resno - PCI_IOV_RESOURCES])
+ return align;
+
+ /*
+ * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+ * SR-IOV. While from hardware perspective, the range mapped by M64
+ * BAR should be size aligned.
+ *
+ * This function returns the total IOV BAR size if M64 BAR is in
+ * Shared PE mode or just VF BAR size if not.
+ * If the M64 BAR is in Single PE mode, return the VF BAR size or
+ * M64 segment size if IOV BAR size is less.
+ */
+ return phb->ioda.total_pe_num * align;
+}
+
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pnv_iov_data *iov;
+ struct pnv_phb *phb;
+ int window_id;
+
+ phb = pci_bus_to_pnvhb(pdev->bus);
+ iov = pnv_iov_get(pdev);
+
+ for_each_set_bit(window_id, iov->used_m64_bar_mask, MAX_M64_BARS) {
+ opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ window_id,
+ 0);
+
+ clear_bit(window_id, &phb->ioda.m64_bar_alloc);
+ }
+
+ return 0;
+}
+
+
+/*
+ * PHB3 and beyond support segmented windows. The window's address range
+ * is subdivided into phb->ioda.total_pe_num segments and there's a 1-1
+ * mapping between PEs and segments.
+ */
+static int64_t pnv_ioda_map_m64_segmented(struct pnv_phb *phb,
+ int window_id,
+ resource_size_t start,
+ resource_size_t size)
+{
+ int64_t rc;
+
+ rc = opal_pci_set_phb_mem_window(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ window_id,
+ start,
+ 0, /* unused */
+ size);
+ if (rc)
+ goto out;
+
+ rc = opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ window_id,
+ OPAL_ENABLE_M64_SPLIT);
+out:
+ if (rc)
+ pr_err("Failed to map M64 window #%d: %lld\n", window_id, rc);
+
+ return rc;
+}
+
+static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb,
+ int pe_num,
+ int window_id,
+ resource_size_t start,
+ resource_size_t size)
+{
+ int64_t rc;
+
+ /*
+ * The API for setting up m64 mmio windows seems to have been designed
+ * with P7-IOC in mind. For that chip each M64 BAR (window) had a fixed
+ * split of 8 equally sized segments each of which could individually
+ * assigned to a PE.
+ *
+ * The problem with this is that the API doesn't have any way to
+ * communicate the number of segments we want on a BAR. This wasn't
+ * a problem for p7-ioc since you didn't have a choice, but the
+ * single PE windows added in PHB3 don't map cleanly to this API.
+ *
+ * As a result we've got this slightly awkward process where we
+ * call opal_pci_map_pe_mmio_window() to put the single in single
+ * PE mode, and set the PE for the window before setting the address
+ * bounds. We need to do it this way because the single PE windows
+ * for PHB3 have different alignment requirements on PHB3.
+ */
+ rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+ pe_num,
+ OPAL_M64_WINDOW_TYPE,
+ window_id,
+ 0);
+ if (rc)
+ goto out;
+
+ /*
+ * NB: In single PE mode the window needs to be aligned to 32MB
+ */
+ rc = opal_pci_set_phb_mem_window(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ window_id,
+ start,
+ 0, /* ignored by FW, m64 is 1-1 */
+ size);
+ if (rc)
+ goto out;
+
+ /*
+ * Now actually enable it. We specified the BAR should be in "non-split"
+ * mode so FW will validate that the BAR is in single PE mode.
+ */
+ rc = opal_pci_phb_mmio_enable(phb->opal_id,
+ OPAL_M64_WINDOW_TYPE,
+ window_id,
+ OPAL_ENABLE_M64_NON_SPLIT);
+out:
+ if (rc)
+ pr_err("Error mapping single PE BAR\n");
+
+ return rc;
+}
+
+static int pnv_pci_alloc_m64_bar(struct pnv_phb *phb, struct pnv_iov_data *iov)
+{
+ int win;
+
+ do {
+ win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
+ phb->ioda.m64_bar_idx + 1, 0);
+
+ if (win >= phb->ioda.m64_bar_idx + 1)
+ return -1;
+ } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
+
+ set_bit(win, iov->used_m64_bar_mask);
+
+ return win;
+}
+
+static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pnv_iov_data *iov;
+ struct pnv_phb *phb;
+ int win;
+ struct resource *res;
+ int i, j;
+ int64_t rc;
+ resource_size_t size, start;
+ int base_pe_num;
+
+ phb = pci_bus_to_pnvhb(pdev->bus);
+ iov = pnv_iov_get(pdev);
+
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &pdev->resource[i + PCI_IOV_RESOURCES];
+ if (!res->flags || !res->parent)
+ continue;
+
+ /* don't need single mode? map everything in one go! */
+ if (!iov->m64_single_mode[i]) {
+ win = pnv_pci_alloc_m64_bar(phb, iov);
+ if (win < 0)
+ goto m64_failed;
+
+ size = resource_size(res);
+ start = res->start;
+
+ rc = pnv_ioda_map_m64_segmented(phb, win, start, size);
+ if (rc)
+ goto m64_failed;
+
+ continue;
+ }
+
+ /* otherwise map each VF with single PE BARs */
+ size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i);
+ base_pe_num = iov->vf_pe_arr[0].pe_number;
+
+ for (j = 0; j < num_vfs; j++) {
+ win = pnv_pci_alloc_m64_bar(phb, iov);
+ if (win < 0)
+ goto m64_failed;
+
+ start = res->start + size * j;
+ rc = pnv_ioda_map_m64_single(phb, win,
+ base_pe_num + j,
+ start,
+ size);
+ if (rc)
+ goto m64_failed;
+ }
+ }
+ return 0;
+
+m64_failed:
+ pnv_pci_vf_release_m64(pdev, num_vfs);
+ return -EBUSY;
+}
+
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
+{
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe, *pe_n;
+
+ phb = pci_bus_to_pnvhb(pdev->bus);
+
+ if (!pdev->is_physfn)
+ return;
+
+ /* FIXME: Use pnv_ioda_release_pe()? */
+ list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
+ if (pe->parent_dev != pdev)
+ continue;
+
+ pnv_pci_ioda2_release_pe_dma(pe);
+
+ /* Remove from list */
+ mutex_lock(&phb->ioda.pe_list_mutex);
+ list_del(&pe->list);
+ mutex_unlock(&phb->ioda.pe_list_mutex);
+
+ pnv_ioda_deconfigure_pe(phb, pe);
+
+ pnv_ioda_free_pe(pe);
+ }
+}
+
+static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
+{
+ struct resource *res, res2;
+ struct pnv_iov_data *iov;
+ resource_size_t size;
+ u16 num_vfs;
+ int i;
+
+ if (!dev->is_physfn)
+ return -EINVAL;
+ iov = pnv_iov_get(dev);
+
+ /*
+ * "offset" is in VFs. The M64 windows are sized so that when they
+ * are segmented, each segment is the same size as the IOV BAR.
+ * Each segment is in a separate PE, and the high order bits of the
+ * address are the PE number. Therefore, each VF's BAR is in a
+ * separate PE, and changing the IOV BAR start address changes the
+ * range of PEs the VFs are in.
+ */
+ num_vfs = iov->num_vfs;
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &dev->resource[i + PCI_IOV_RESOURCES];
+ if (!res->flags || !res->parent)
+ continue;
+ if (iov->m64_single_mode[i])
+ continue;
+
+ /*
+ * The actual IOV BAR range is determined by the start address
+ * and the actual size for num_vfs VFs BAR. This check is to
+ * make sure that after shifting, the range will not overlap
+ * with another device.
+ */
+ size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+ res2.flags = res->flags;
+ res2.start = res->start + (size * offset);
+ res2.end = res2.start + (size * num_vfs) - 1;
+
+ if (res2.end > res->end) {
+ dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
+ i, &res2, res, num_vfs, offset);
+ return -EBUSY;
+ }
+ }
+
+ /*
+ * Since M64 BAR shares segments among all possible 256 PEs,
+ * we have to shift the beginning of PF IOV BAR to make it start from
+ * the segment which belongs to the PE number assigned to the first VF.
+ * This creates a "hole" in the /proc/iomem which could be used for
+ * allocating other resources so we reserve this area below and
+ * release when IOV is released.
+ */
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &dev->resource[i + PCI_IOV_RESOURCES];
+ if (!res->flags || !res->parent)
+ continue;
+ if (iov->m64_single_mode[i])
+ continue;
+
+ size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+ res2 = *res;
+ res->start += size * offset;
+
+ dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
+ i, &res2, res, (offset > 0) ? "En" : "Dis",
+ num_vfs, offset);
+
+ if (offset < 0) {
+ devm_release_resource(&dev->dev, &iov->holes[i]);
+ memset(&iov->holes[i], 0, sizeof(iov->holes[i]));
+ }
+
+ pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+
+ if (offset > 0) {
+ iov->holes[i].start = res2.start;
+ iov->holes[i].end = res2.start + size * offset - 1;
+ iov->holes[i].flags = IORESOURCE_BUS;
+ iov->holes[i].name = "pnv_iov_reserved";
+ devm_request_resource(&dev->dev, res->parent,
+ &iov->holes[i]);
+ }
+ }
+ return 0;
+}
+
+static void pnv_pci_sriov_disable(struct pci_dev *pdev)
+{
+ u16 num_vfs, base_pe;
+ struct pnv_iov_data *iov;
+
+ iov = pnv_iov_get(pdev);
+ if (WARN_ON(!iov))
+ return;
+
+ num_vfs = iov->num_vfs;
+ base_pe = iov->vf_pe_arr[0].pe_number;
+
+ /* Release VF PEs */
+ pnv_ioda_release_vf_PE(pdev);
+
+ /* Un-shift the IOV BARs if we need to */
+ if (iov->need_shift)
+ pnv_pci_vf_resource_shift(pdev, -base_pe);
+
+ /* Release M64 windows */
+ pnv_pci_vf_release_m64(pdev, num_vfs);
+}
+
+static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pnv_phb *phb;
+ struct pnv_ioda_pe *pe;
+ int pe_num;
+ u16 vf_index;
+ struct pnv_iov_data *iov;
+ struct pci_dn *pdn;
+
+ if (!pdev->is_physfn)
+ return;
+
+ phb = pci_bus_to_pnvhb(pdev->bus);
+ pdn = pci_get_pdn(pdev);
+ iov = pnv_iov_get(pdev);
+
+ /* Reserve PE for each VF */
+ for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+ int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
+ int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
+ struct pci_dn *vf_pdn;
+
+ pe = &iov->vf_pe_arr[vf_index];
+ pe->phb = phb;
+ pe->flags = PNV_IODA_PE_VF;
+ pe->pbus = NULL;
+ pe->parent_dev = pdev;
+ pe->mve_number = -1;
+ pe->rid = (vf_bus << 8) | vf_devfn;
+
+ pe_num = pe->pe_number;
+ pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
+ pci_domain_nr(pdev->bus), pdev->bus->number,
+ PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
+
+ if (pnv_ioda_configure_pe(phb, pe)) {
+ /* XXX What do we do here ? */
+ pnv_ioda_free_pe(pe);
+ pe->pdev = NULL;
+ continue;
+ }
+
+ /* Put PE to the list */
+ mutex_lock(&phb->ioda.pe_list_mutex);
+ list_add_tail(&pe->list, &phb->ioda.pe_list);
+ mutex_unlock(&phb->ioda.pe_list_mutex);
+
+ /* associate this pe to it's pdn */
+ list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
+ if (vf_pdn->busno == vf_bus &&
+ vf_pdn->devfn == vf_devfn) {
+ vf_pdn->pe_number = pe_num;
+ break;
+ }
+ }
+
+ pnv_pci_ioda2_setup_dma_pe(phb, pe);
+ }
+}
+
+static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pnv_ioda_pe *base_pe;
+ struct pnv_iov_data *iov;
+ struct pnv_phb *phb;
+ int ret;
+ u16 i;
+
+ phb = pci_bus_to_pnvhb(pdev->bus);
+ iov = pnv_iov_get(pdev);
+
+ /*
+ * There's a calls to IODA2 PE setup code littered throughout. We could
+ * probably fix that, but we'd still have problems due to the
+ * restriction inherent on IODA1 PHBs.
+ *
+ * NB: We class IODA3 as IODA2 since they're very similar.
+ */
+ if (phb->type != PNV_PHB_IODA2) {
+ pci_err(pdev, "SR-IOV is not supported on this PHB\n");
+ return -ENXIO;
+ }
+
+ if (!iov) {
+ dev_info(&pdev->dev, "don't support this SRIOV device with non 64bit-prefetchable IOV BAR\n");
+ return -ENOSPC;
+ }
+
+ /* allocate a contiguous block of PEs for our VFs */
+ base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
+ if (!base_pe) {
+ pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
+ return -EBUSY;
+ }
+
+ iov->vf_pe_arr = base_pe;
+ iov->num_vfs = num_vfs;
+
+ /* Assign M64 window accordingly */
+ ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
+ if (ret) {
+ dev_info(&pdev->dev, "Not enough M64 window resources\n");
+ goto m64_failed;
+ }
+
+ /*
+ * When using one M64 BAR to map one IOV BAR, we need to shift
+ * the IOV BAR according to the PE# allocated to the VFs.
+ * Otherwise, the PE# for the VF will conflict with others.
+ */
+ if (iov->need_shift) {
+ ret = pnv_pci_vf_resource_shift(pdev, base_pe->pe_number);
+ if (ret)
+ goto shift_failed;
+ }
+
+ /* Setup VF PEs */
+ pnv_ioda_setup_vf_PE(pdev, num_vfs);
+
+ return 0;
+
+shift_failed:
+ pnv_pci_vf_release_m64(pdev, num_vfs);
+
+m64_failed:
+ for (i = 0; i < num_vfs; i++)
+ pnv_ioda_free_pe(&iov->vf_pe_arr[i]);
+
+ return ret;
+}
+
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
+{
+ pnv_pci_sriov_disable(pdev);
+
+ /* Release PCI data */
+ remove_sriov_vf_pdns(pdev);
+ return 0;
+}
+
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ /* Allocate PCI data */
+ add_sriov_vf_pdns(pdev);
+
+ return pnv_pci_sriov_enable(pdev, num_vfs);
+}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
new file mode 100644
index 000000000..35f566aa0
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -0,0 +1,862 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Support PCI/PCIe on PowerNV platforms
+ *
+ * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/msi.h>
+#include <linux/iommu.h>
+#include <linux/sched/mm.h>
+
+#include <asm/sections.h>
+#include <asm/io.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
+#include <asm/pnv-pci.h>
+#include <asm/opal.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/firmware.h>
+#include <asm/eeh_event.h>
+#include <asm/eeh.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+static DEFINE_MUTEX(tunnel_mutex);
+
+int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
+{
+ struct device_node *node = np;
+ u32 bdfn;
+ u64 phbid;
+ int ret;
+
+ ret = of_property_read_u32(np, "reg", &bdfn);
+ if (ret)
+ return -ENXIO;
+
+ bdfn = ((bdfn & 0x00ffff00) >> 8);
+ for (node = np; node; node = of_get_parent(node)) {
+ if (!PCI_DN(node)) {
+ of_node_put(node);
+ break;
+ }
+
+ if (!of_device_is_compatible(node, "ibm,ioda2-phb") &&
+ !of_device_is_compatible(node, "ibm,ioda3-phb") &&
+ !of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb")) {
+ of_node_put(node);
+ continue;
+ }
+
+ ret = of_property_read_u64(node, "ibm,opal-phbid", &phbid);
+ if (ret) {
+ of_node_put(node);
+ return -ENXIO;
+ }
+
+ if (of_device_is_compatible(node, "ibm,ioda2-npu2-opencapi-phb"))
+ *id = PCI_PHB_SLOT_ID(phbid);
+ else
+ *id = PCI_SLOT_ID(phbid, bdfn);
+ return 0;
+ }
+
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_slot_id);
+
+int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len)
+{
+ int64_t rc;
+
+ if (!opal_check_token(OPAL_GET_DEVICE_TREE))
+ return -ENXIO;
+
+ rc = opal_get_device_tree(phandle, (uint64_t)buf, len);
+ if (rc < OPAL_SUCCESS)
+ return -EIO;
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_device_tree);
+
+int pnv_pci_get_presence_state(uint64_t id, uint8_t *state)
+{
+ int64_t rc;
+
+ if (!opal_check_token(OPAL_PCI_GET_PRESENCE_STATE))
+ return -ENXIO;
+
+ rc = opal_pci_get_presence_state(id, (uint64_t)state);
+ if (rc != OPAL_SUCCESS)
+ return -EIO;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_presence_state);
+
+int pnv_pci_get_power_state(uint64_t id, uint8_t *state)
+{
+ int64_t rc;
+
+ if (!opal_check_token(OPAL_PCI_GET_POWER_STATE))
+ return -ENXIO;
+
+ rc = opal_pci_get_power_state(id, (uint64_t)state);
+ if (rc != OPAL_SUCCESS)
+ return -EIO;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_power_state);
+
+int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg)
+{
+ struct opal_msg m;
+ int token, ret;
+ int64_t rc;
+
+ if (!opal_check_token(OPAL_PCI_SET_POWER_STATE))
+ return -ENXIO;
+
+ token = opal_async_get_token_interruptible();
+ if (unlikely(token < 0))
+ return token;
+
+ rc = opal_pci_set_power_state(token, id, (uint64_t)&state);
+ if (rc == OPAL_SUCCESS) {
+ ret = 0;
+ goto exit;
+ } else if (rc != OPAL_ASYNC_COMPLETION) {
+ ret = -EIO;
+ goto exit;
+ }
+
+ ret = opal_async_wait_response(token, &m);
+ if (ret < 0)
+ goto exit;
+
+ if (msg) {
+ ret = 1;
+ memcpy(msg, &m, sizeof(m));
+ }
+
+exit:
+ opal_async_release_token(token);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_power_state);
+
+/* Nicely print the contents of the PE State Tables (PEST). */
+static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size)
+{
+ __be64 prevA = ULONG_MAX, prevB = ULONG_MAX;
+ bool dup = false;
+ int i;
+
+ for (i = 0; i < pest_size; i++) {
+ __be64 peA = be64_to_cpu(pestA[i]);
+ __be64 peB = be64_to_cpu(pestB[i]);
+
+ if (peA != prevA || peB != prevB) {
+ if (dup) {
+ pr_info("PE[..%03x] A/B: as above\n", i-1);
+ dup = false;
+ }
+ prevA = peA;
+ prevB = peB;
+ if (peA & PNV_IODA_STOPPED_STATE ||
+ peB & PNV_IODA_STOPPED_STATE)
+ pr_info("PE[%03x] A/B: %016llx %016llx\n",
+ i, peA, peB);
+ } else if (!dup && (peA & PNV_IODA_STOPPED_STATE ||
+ peB & PNV_IODA_STOPPED_STATE)) {
+ dup = true;
+ }
+ }
+}
+
+static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
+ struct OpalIoPhbErrorCommon *common)
+{
+ struct OpalIoP7IOCPhbErrorData *data;
+
+ data = (struct OpalIoP7IOCPhbErrorData *)common;
+ pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n",
+ hose->global_number, be32_to_cpu(common->version));
+
+ if (data->brdgCtl)
+ pr_info("brdgCtl: %08x\n",
+ be32_to_cpu(data->brdgCtl));
+ if (data->portStatusReg || data->rootCmplxStatus ||
+ data->busAgentStatus)
+ pr_info("UtlSts: %08x %08x %08x\n",
+ be32_to_cpu(data->portStatusReg),
+ be32_to_cpu(data->rootCmplxStatus),
+ be32_to_cpu(data->busAgentStatus));
+ if (data->deviceStatus || data->slotStatus ||
+ data->linkStatus || data->devCmdStatus ||
+ data->devSecStatus)
+ pr_info("RootSts: %08x %08x %08x %08x %08x\n",
+ be32_to_cpu(data->deviceStatus),
+ be32_to_cpu(data->slotStatus),
+ be32_to_cpu(data->linkStatus),
+ be32_to_cpu(data->devCmdStatus),
+ be32_to_cpu(data->devSecStatus));
+ if (data->rootErrorStatus || data->uncorrErrorStatus ||
+ data->corrErrorStatus)
+ pr_info("RootErrSts: %08x %08x %08x\n",
+ be32_to_cpu(data->rootErrorStatus),
+ be32_to_cpu(data->uncorrErrorStatus),
+ be32_to_cpu(data->corrErrorStatus));
+ if (data->tlpHdr1 || data->tlpHdr2 ||
+ data->tlpHdr3 || data->tlpHdr4)
+ pr_info("RootErrLog: %08x %08x %08x %08x\n",
+ be32_to_cpu(data->tlpHdr1),
+ be32_to_cpu(data->tlpHdr2),
+ be32_to_cpu(data->tlpHdr3),
+ be32_to_cpu(data->tlpHdr4));
+ if (data->sourceId || data->errorClass ||
+ data->correlator)
+ pr_info("RootErrLog1: %08x %016llx %016llx\n",
+ be32_to_cpu(data->sourceId),
+ be64_to_cpu(data->errorClass),
+ be64_to_cpu(data->correlator));
+ if (data->p7iocPlssr || data->p7iocCsr)
+ pr_info("PhbSts: %016llx %016llx\n",
+ be64_to_cpu(data->p7iocPlssr),
+ be64_to_cpu(data->p7iocCsr));
+ if (data->lemFir)
+ pr_info("Lem: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->lemFir),
+ be64_to_cpu(data->lemErrorMask),
+ be64_to_cpu(data->lemWOF));
+ if (data->phbErrorStatus)
+ pr_info("PhbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbErrorStatus),
+ be64_to_cpu(data->phbFirstErrorStatus),
+ be64_to_cpu(data->phbErrorLog0),
+ be64_to_cpu(data->phbErrorLog1));
+ if (data->mmioErrorStatus)
+ pr_info("OutErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->mmioErrorStatus),
+ be64_to_cpu(data->mmioFirstErrorStatus),
+ be64_to_cpu(data->mmioErrorLog0),
+ be64_to_cpu(data->mmioErrorLog1));
+ if (data->dma0ErrorStatus)
+ pr_info("InAErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->dma0ErrorStatus),
+ be64_to_cpu(data->dma0FirstErrorStatus),
+ be64_to_cpu(data->dma0ErrorLog0),
+ be64_to_cpu(data->dma0ErrorLog1));
+ if (data->dma1ErrorStatus)
+ pr_info("InBErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->dma1ErrorStatus),
+ be64_to_cpu(data->dma1FirstErrorStatus),
+ be64_to_cpu(data->dma1ErrorLog0),
+ be64_to_cpu(data->dma1ErrorLog1));
+
+ pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_P7IOC_NUM_PEST_REGS);
+}
+
+static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
+ struct OpalIoPhbErrorCommon *common)
+{
+ struct OpalIoPhb3ErrorData *data;
+
+ data = (struct OpalIoPhb3ErrorData*)common;
+ pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n",
+ hose->global_number, be32_to_cpu(common->version));
+ if (data->brdgCtl)
+ pr_info("brdgCtl: %08x\n",
+ be32_to_cpu(data->brdgCtl));
+ if (data->portStatusReg || data->rootCmplxStatus ||
+ data->busAgentStatus)
+ pr_info("UtlSts: %08x %08x %08x\n",
+ be32_to_cpu(data->portStatusReg),
+ be32_to_cpu(data->rootCmplxStatus),
+ be32_to_cpu(data->busAgentStatus));
+ if (data->deviceStatus || data->slotStatus ||
+ data->linkStatus || data->devCmdStatus ||
+ data->devSecStatus)
+ pr_info("RootSts: %08x %08x %08x %08x %08x\n",
+ be32_to_cpu(data->deviceStatus),
+ be32_to_cpu(data->slotStatus),
+ be32_to_cpu(data->linkStatus),
+ be32_to_cpu(data->devCmdStatus),
+ be32_to_cpu(data->devSecStatus));
+ if (data->rootErrorStatus || data->uncorrErrorStatus ||
+ data->corrErrorStatus)
+ pr_info("RootErrSts: %08x %08x %08x\n",
+ be32_to_cpu(data->rootErrorStatus),
+ be32_to_cpu(data->uncorrErrorStatus),
+ be32_to_cpu(data->corrErrorStatus));
+ if (data->tlpHdr1 || data->tlpHdr2 ||
+ data->tlpHdr3 || data->tlpHdr4)
+ pr_info("RootErrLog: %08x %08x %08x %08x\n",
+ be32_to_cpu(data->tlpHdr1),
+ be32_to_cpu(data->tlpHdr2),
+ be32_to_cpu(data->tlpHdr3),
+ be32_to_cpu(data->tlpHdr4));
+ if (data->sourceId || data->errorClass ||
+ data->correlator)
+ pr_info("RootErrLog1: %08x %016llx %016llx\n",
+ be32_to_cpu(data->sourceId),
+ be64_to_cpu(data->errorClass),
+ be64_to_cpu(data->correlator));
+ if (data->nFir)
+ pr_info("nFir: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->nFir),
+ be64_to_cpu(data->nFirMask),
+ be64_to_cpu(data->nFirWOF));
+ if (data->phbPlssr || data->phbCsr)
+ pr_info("PhbSts: %016llx %016llx\n",
+ be64_to_cpu(data->phbPlssr),
+ be64_to_cpu(data->phbCsr));
+ if (data->lemFir)
+ pr_info("Lem: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->lemFir),
+ be64_to_cpu(data->lemErrorMask),
+ be64_to_cpu(data->lemWOF));
+ if (data->phbErrorStatus)
+ pr_info("PhbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbErrorStatus),
+ be64_to_cpu(data->phbFirstErrorStatus),
+ be64_to_cpu(data->phbErrorLog0),
+ be64_to_cpu(data->phbErrorLog1));
+ if (data->mmioErrorStatus)
+ pr_info("OutErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->mmioErrorStatus),
+ be64_to_cpu(data->mmioFirstErrorStatus),
+ be64_to_cpu(data->mmioErrorLog0),
+ be64_to_cpu(data->mmioErrorLog1));
+ if (data->dma0ErrorStatus)
+ pr_info("InAErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->dma0ErrorStatus),
+ be64_to_cpu(data->dma0FirstErrorStatus),
+ be64_to_cpu(data->dma0ErrorLog0),
+ be64_to_cpu(data->dma0ErrorLog1));
+ if (data->dma1ErrorStatus)
+ pr_info("InBErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->dma1ErrorStatus),
+ be64_to_cpu(data->dma1FirstErrorStatus),
+ be64_to_cpu(data->dma1ErrorLog0),
+ be64_to_cpu(data->dma1ErrorLog1));
+
+ pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB3_NUM_PEST_REGS);
+}
+
+static void pnv_pci_dump_phb4_diag_data(struct pci_controller *hose,
+ struct OpalIoPhbErrorCommon *common)
+{
+ struct OpalIoPhb4ErrorData *data;
+
+ data = (struct OpalIoPhb4ErrorData*)common;
+ pr_info("PHB4 PHB#%d Diag-data (Version: %d)\n",
+ hose->global_number, be32_to_cpu(common->version));
+ if (data->brdgCtl)
+ pr_info("brdgCtl: %08x\n",
+ be32_to_cpu(data->brdgCtl));
+ if (data->deviceStatus || data->slotStatus ||
+ data->linkStatus || data->devCmdStatus ||
+ data->devSecStatus)
+ pr_info("RootSts: %08x %08x %08x %08x %08x\n",
+ be32_to_cpu(data->deviceStatus),
+ be32_to_cpu(data->slotStatus),
+ be32_to_cpu(data->linkStatus),
+ be32_to_cpu(data->devCmdStatus),
+ be32_to_cpu(data->devSecStatus));
+ if (data->rootErrorStatus || data->uncorrErrorStatus ||
+ data->corrErrorStatus)
+ pr_info("RootErrSts: %08x %08x %08x\n",
+ be32_to_cpu(data->rootErrorStatus),
+ be32_to_cpu(data->uncorrErrorStatus),
+ be32_to_cpu(data->corrErrorStatus));
+ if (data->tlpHdr1 || data->tlpHdr2 ||
+ data->tlpHdr3 || data->tlpHdr4)
+ pr_info("RootErrLog: %08x %08x %08x %08x\n",
+ be32_to_cpu(data->tlpHdr1),
+ be32_to_cpu(data->tlpHdr2),
+ be32_to_cpu(data->tlpHdr3),
+ be32_to_cpu(data->tlpHdr4));
+ if (data->sourceId)
+ pr_info("sourceId: %08x\n", be32_to_cpu(data->sourceId));
+ if (data->nFir)
+ pr_info("nFir: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->nFir),
+ be64_to_cpu(data->nFirMask),
+ be64_to_cpu(data->nFirWOF));
+ if (data->phbPlssr || data->phbCsr)
+ pr_info("PhbSts: %016llx %016llx\n",
+ be64_to_cpu(data->phbPlssr),
+ be64_to_cpu(data->phbCsr));
+ if (data->lemFir)
+ pr_info("Lem: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->lemFir),
+ be64_to_cpu(data->lemErrorMask),
+ be64_to_cpu(data->lemWOF));
+ if (data->phbErrorStatus)
+ pr_info("PhbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbErrorStatus),
+ be64_to_cpu(data->phbFirstErrorStatus),
+ be64_to_cpu(data->phbErrorLog0),
+ be64_to_cpu(data->phbErrorLog1));
+ if (data->phbTxeErrorStatus)
+ pr_info("PhbTxeErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbTxeErrorStatus),
+ be64_to_cpu(data->phbTxeFirstErrorStatus),
+ be64_to_cpu(data->phbTxeErrorLog0),
+ be64_to_cpu(data->phbTxeErrorLog1));
+ if (data->phbRxeArbErrorStatus)
+ pr_info("RxeArbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRxeArbErrorStatus),
+ be64_to_cpu(data->phbRxeArbFirstErrorStatus),
+ be64_to_cpu(data->phbRxeArbErrorLog0),
+ be64_to_cpu(data->phbRxeArbErrorLog1));
+ if (data->phbRxeMrgErrorStatus)
+ pr_info("RxeMrgErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRxeMrgErrorStatus),
+ be64_to_cpu(data->phbRxeMrgFirstErrorStatus),
+ be64_to_cpu(data->phbRxeMrgErrorLog0),
+ be64_to_cpu(data->phbRxeMrgErrorLog1));
+ if (data->phbRxeTceErrorStatus)
+ pr_info("RxeTceErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRxeTceErrorStatus),
+ be64_to_cpu(data->phbRxeTceFirstErrorStatus),
+ be64_to_cpu(data->phbRxeTceErrorLog0),
+ be64_to_cpu(data->phbRxeTceErrorLog1));
+
+ if (data->phbPblErrorStatus)
+ pr_info("PblErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbPblErrorStatus),
+ be64_to_cpu(data->phbPblFirstErrorStatus),
+ be64_to_cpu(data->phbPblErrorLog0),
+ be64_to_cpu(data->phbPblErrorLog1));
+ if (data->phbPcieDlpErrorStatus)
+ pr_info("PcieDlp: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbPcieDlpErrorLog1),
+ be64_to_cpu(data->phbPcieDlpErrorLog2),
+ be64_to_cpu(data->phbPcieDlpErrorStatus));
+ if (data->phbRegbErrorStatus)
+ pr_info("RegbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRegbErrorStatus),
+ be64_to_cpu(data->phbRegbFirstErrorStatus),
+ be64_to_cpu(data->phbRegbErrorLog0),
+ be64_to_cpu(data->phbRegbErrorLog1));
+
+
+ pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB4_NUM_PEST_REGS);
+}
+
+void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+ unsigned char *log_buff)
+{
+ struct OpalIoPhbErrorCommon *common;
+
+ if (!hose || !log_buff)
+ return;
+
+ common = (struct OpalIoPhbErrorCommon *)log_buff;
+ switch (be32_to_cpu(common->ioType)) {
+ case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
+ pnv_pci_dump_p7ioc_diag_data(hose, common);
+ break;
+ case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
+ pnv_pci_dump_phb3_diag_data(hose, common);
+ break;
+ case OPAL_PHB_ERROR_DATA_TYPE_PHB4:
+ pnv_pci_dump_phb4_diag_data(hose, common);
+ break;
+ default:
+ pr_warn("%s: Unrecognized ioType %d\n",
+ __func__, be32_to_cpu(common->ioType));
+ }
+}
+
+static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
+{
+ unsigned long flags, rc;
+ int has_diag, ret = 0;
+
+ spin_lock_irqsave(&phb->lock, flags);
+
+ /* Fetch PHB diag-data */
+ rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
+ phb->diag_data_size);
+ has_diag = (rc == OPAL_SUCCESS);
+
+ /* If PHB supports compound PE, to handle it */
+ if (phb->unfreeze_pe) {
+ ret = phb->unfreeze_pe(phb,
+ pe_no,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ } else {
+ rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+ pe_no,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+ if (rc) {
+ pr_warn("%s: Failure %ld clearing frozen "
+ "PHB#%x-PE#%x\n",
+ __func__, rc, phb->hose->global_number,
+ pe_no);
+ ret = -EIO;
+ }
+ }
+
+ /*
+ * For now, let's only display the diag buffer when we fail to clear
+ * the EEH status. We'll do more sensible things later when we have
+ * proper EEH support. We need to make sure we don't pollute ourselves
+ * with the normal errors generated when probing empty slots
+ */
+ if (has_diag && ret)
+ pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data);
+
+ spin_unlock_irqrestore(&phb->lock, flags);
+}
+
+static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
+{
+ struct pnv_phb *phb = pdn->phb->private_data;
+ u8 fstate = 0;
+ __be16 pcierr = 0;
+ unsigned int pe_no;
+ s64 rc;
+
+ /*
+ * Get the PE#. During the PCI probe stage, we might not
+ * setup that yet. So all ER errors should be mapped to
+ * reserved PE.
+ */
+ pe_no = pdn->pe_number;
+ if (pe_no == IODA_INVALID_PE) {
+ pe_no = phb->ioda.reserved_pe_idx;
+ }
+
+ /*
+ * Fetch frozen state. If the PHB support compound PE,
+ * we need handle that case.
+ */
+ if (phb->get_pe_state) {
+ fstate = phb->get_pe_state(phb, pe_no);
+ } else {
+ rc = opal_pci_eeh_freeze_status(phb->opal_id,
+ pe_no,
+ &fstate,
+ &pcierr,
+ NULL);
+ if (rc) {
+ pr_warn("%s: Failure %lld getting PHB#%x-PE#%x state\n",
+ __func__, rc, phb->hose->global_number, pe_no);
+ return;
+ }
+ }
+
+ pr_devel(" -> EEH check, bdfn=%04x PE#%x fstate=%x\n",
+ (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
+
+ /* Clear the frozen state if applicable */
+ if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
+ fstate == OPAL_EEH_STOPPED_DMA_FREEZE ||
+ fstate == OPAL_EEH_STOPPED_MMIO_DMA_FREEZE) {
+ /*
+ * If PHB supports compound PE, freeze it for
+ * consistency.
+ */
+ if (phb->freeze_pe)
+ phb->freeze_pe(phb, pe_no);
+
+ pnv_pci_handle_eeh_config(phb, pe_no);
+ }
+}
+
+int pnv_pci_cfg_read(struct pci_dn *pdn,
+ int where, int size, u32 *val)
+{
+ struct pnv_phb *phb = pdn->phb->private_data;
+ u32 bdfn = (pdn->busno << 8) | pdn->devfn;
+ s64 rc;
+
+ switch (size) {
+ case 1: {
+ u8 v8;
+ rc = opal_pci_config_read_byte(phb->opal_id, bdfn, where, &v8);
+ *val = (rc == OPAL_SUCCESS) ? v8 : 0xff;
+ break;
+ }
+ case 2: {
+ __be16 v16;
+ rc = opal_pci_config_read_half_word(phb->opal_id, bdfn, where,
+ &v16);
+ *val = (rc == OPAL_SUCCESS) ? be16_to_cpu(v16) : 0xffff;
+ break;
+ }
+ case 4: {
+ __be32 v32;
+ rc = opal_pci_config_read_word(phb->opal_id, bdfn, where, &v32);
+ *val = (rc == OPAL_SUCCESS) ? be32_to_cpu(v32) : 0xffffffff;
+ break;
+ }
+ default:
+ return PCIBIOS_FUNC_NOT_SUPPORTED;
+ }
+
+ pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+ __func__, pdn->busno, pdn->devfn, where, size, *val);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+int pnv_pci_cfg_write(struct pci_dn *pdn,
+ int where, int size, u32 val)
+{
+ struct pnv_phb *phb = pdn->phb->private_data;
+ u32 bdfn = (pdn->busno << 8) | pdn->devfn;
+
+ pr_devel("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
+ __func__, pdn->busno, pdn->devfn, where, size, val);
+ switch (size) {
+ case 1:
+ opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
+ break;
+ case 2:
+ opal_pci_config_write_half_word(phb->opal_id, bdfn, where, val);
+ break;
+ case 4:
+ opal_pci_config_write_word(phb->opal_id, bdfn, where, val);
+ break;
+ default:
+ return PCIBIOS_FUNC_NOT_SUPPORTED;
+ }
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+#ifdef CONFIG_EEH
+static bool pnv_pci_cfg_check(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev = NULL;
+ struct pnv_phb *phb = pdn->phb->private_data;
+
+ /* EEH not enabled ? */
+ if (!(phb->flags & PNV_PHB_FLAG_EEH))
+ return true;
+
+ /* PE reset or device removed ? */
+ edev = pdn->edev;
+ if (edev) {
+ if (edev->pe &&
+ (edev->pe->state & EEH_PE_CFG_BLOCKED))
+ return false;
+
+ if (edev->mode & EEH_DEV_REMOVED)
+ return false;
+ }
+
+ return true;
+}
+#else
+static inline pnv_pci_cfg_check(struct pci_dn *pdn)
+{
+ return true;
+}
+#endif /* CONFIG_EEH */
+
+static int pnv_pci_read_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 *val)
+{
+ struct pci_dn *pdn;
+ struct pnv_phb *phb;
+ int ret;
+
+ *val = 0xFFFFFFFF;
+ pdn = pci_get_pdn_by_devfn(bus, devfn);
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (!pnv_pci_cfg_check(pdn))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ ret = pnv_pci_cfg_read(pdn, where, size, val);
+ phb = pdn->phb->private_data;
+ if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) {
+ if (*val == EEH_IO_ERROR_VALUE(size) &&
+ eeh_dev_check_failure(pdn->edev))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ } else {
+ pnv_pci_config_check_eeh(pdn);
+ }
+
+ return ret;
+}
+
+static int pnv_pci_write_config(struct pci_bus *bus,
+ unsigned int devfn,
+ int where, int size, u32 val)
+{
+ struct pci_dn *pdn;
+ struct pnv_phb *phb;
+ int ret;
+
+ pdn = pci_get_pdn_by_devfn(bus, devfn);
+ if (!pdn)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ if (!pnv_pci_cfg_check(pdn))
+ return PCIBIOS_DEVICE_NOT_FOUND;
+
+ ret = pnv_pci_cfg_write(pdn, where, size, val);
+ phb = pdn->phb->private_data;
+ if (!(phb->flags & PNV_PHB_FLAG_EEH))
+ pnv_pci_config_check_eeh(pdn);
+
+ return ret;
+}
+
+struct pci_ops pnv_pci_ops = {
+ .read = pnv_pci_read_config,
+ .write = pnv_pci_write_config,
+};
+
+struct iommu_table *pnv_pci_table_alloc(int nid)
+{
+ struct iommu_table *tbl;
+
+ tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
+ if (!tbl)
+ return NULL;
+
+ INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+ kref_init(&tbl->it_kref);
+
+ return tbl;
+}
+
+struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+ return of_node_get(hose->dn);
+}
+EXPORT_SYMBOL(pnv_pci_get_phb_node);
+
+int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
+{
+ struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
+ u64 tunnel_bar;
+ __be64 val;
+ int rc;
+
+ if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR))
+ return -ENXIO;
+ if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR))
+ return -ENXIO;
+
+ mutex_lock(&tunnel_mutex);
+ rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val);
+ if (rc != OPAL_SUCCESS) {
+ rc = -EIO;
+ goto out;
+ }
+ tunnel_bar = be64_to_cpu(val);
+ if (enable) {
+ /*
+ * Only one device per PHB can use atomics.
+ * Our policy is first-come, first-served.
+ */
+ if (tunnel_bar) {
+ if (tunnel_bar != addr)
+ rc = -EBUSY;
+ else
+ rc = 0; /* Setting same address twice is ok */
+ goto out;
+ }
+ } else {
+ /*
+ * The device that owns atomics and wants to release
+ * them must pass the same address with enable == 0.
+ */
+ if (tunnel_bar != addr) {
+ rc = -EPERM;
+ goto out;
+ }
+ addr = 0x0ULL;
+ }
+ rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr);
+ rc = opal_error_code(rc);
+out:
+ mutex_unlock(&tunnel_mutex);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar);
+
+void pnv_pci_shutdown(void)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node)
+ if (hose->controller_ops.shutdown)
+ hose->controller_ops.shutdown(hose);
+}
+
+/* Fixup wrong class code in p7ioc and p8 root complex */
+static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
+{
+ dev->class = PCI_CLASS_BRIDGE_PCI_NORMAL;
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk);
+
+void __init pnv_pci_init(void)
+{
+ struct device_node *np;
+
+ pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN);
+
+ /* If we don't have OPAL, eg. in sim, just skip PCI probe */
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
+ return;
+
+#ifdef CONFIG_PCIEPORTBUS
+ /*
+ * On PowerNV PCIe devices are (currently) managed in cooperation
+ * with firmware. This isn't *strictly* required, but there's enough
+ * assumptions baked into both firmware and the platform code that
+ * it's unwise to allow the portbus services to be used.
+ *
+ * We need to fix this eventually, but for now set this flag to disable
+ * the portbus driver. The AER service isn't required since that AER
+ * events are handled via EEH. The pciehp hotplug driver can't work
+ * without kernel changes (and portbus binding breaks pnv_php). The
+ * other services also require some thinking about how we're going
+ * to integrate them.
+ */
+ pcie_ports_disabled = true;
+#endif
+
+ /* Look for ioda2 built-in PHB3's */
+ for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
+ pnv_pci_init_ioda2_phb(np);
+
+ /* Look for ioda3 built-in PHB4's, we treat them as IODA2 */
+ for_each_compatible_node(np, NULL, "ibm,ioda3-phb")
+ pnv_pci_init_ioda2_phb(np);
+
+ /* Look for NPU2 OpenCAPI PHBs */
+ for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
+ pnv_pci_init_npu2_opencapi_phb(np);
+
+ /* Configure IOMMU DMA hooks */
+ set_pci_dma_ops(&dma_iommu_ops);
+}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
new file mode 100644
index 000000000..957f2b47a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -0,0 +1,340 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __POWERNV_PCI_H
+#define __POWERNV_PCI_H
+
+#include <linux/compiler.h> /* for __printf */
+#include <linux/iommu.h>
+#include <asm/iommu.h>
+#include <asm/msi_bitmap.h>
+
+struct pci_dn;
+
+enum pnv_phb_type {
+ PNV_PHB_IODA2,
+ PNV_PHB_NPU_OCAPI,
+};
+
+/* Precise PHB model for error management */
+enum pnv_phb_model {
+ PNV_PHB_MODEL_UNKNOWN,
+ PNV_PHB_MODEL_P7IOC,
+ PNV_PHB_MODEL_PHB3,
+};
+
+#define PNV_PCI_DIAG_BUF_SIZE 8192
+#define PNV_IODA_PE_DEV (1 << 0) /* PE has single PCI device */
+#define PNV_IODA_PE_BUS (1 << 1) /* PE has primary PCI bus */
+#define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */
+#define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */
+#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
+#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
+
+/*
+ * A brief note on PNV_IODA_PE_BUS_ALL
+ *
+ * This is needed because of the behaviour of PCIe-to-PCI bridges. The PHB uses
+ * the Requester ID field of the PCIe request header to determine the device
+ * (and PE) that initiated a DMA. In legacy PCI individual memory read/write
+ * requests aren't tagged with the RID. To work around this the PCIe-to-PCI
+ * bridge will use (secondary_bus_no << 8) | 0x00 as the RID on the PCIe side.
+ *
+ * PCIe-to-X bridges have a similar issue even though PCI-X requests also have
+ * a RID in the transaction header. The PCIe-to-X bridge is permitted to "take
+ * ownership" of a transaction by a PCI-X device when forwarding it to the PCIe
+ * side of the bridge.
+ *
+ * To work around these problems we use the BUS_ALL flag since every subordinate
+ * bus of the bridge should go into the same PE.
+ */
+
+/* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */
+#define PNV_IODA_STOPPED_STATE 0x8000000000000000
+
+/* Data associated with a PE, including IOMMU tracking etc.. */
+struct pnv_phb;
+struct pnv_ioda_pe {
+ unsigned long flags;
+ struct pnv_phb *phb;
+ int device_count;
+
+ /* A PE can be associated with a single device or an
+ * entire bus (& children). In the former case, pdev
+ * is populated, in the later case, pbus is.
+ */
+#ifdef CONFIG_PCI_IOV
+ struct pci_dev *parent_dev;
+#endif
+ struct pci_dev *pdev;
+ struct pci_bus *pbus;
+
+ /* Effective RID (device RID for a device PE and base bus
+ * RID with devfn 0 for a bus PE)
+ */
+ unsigned int rid;
+
+ /* PE number */
+ unsigned int pe_number;
+
+ /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
+ struct iommu_table_group table_group;
+
+ /* 64-bit TCE bypass region */
+ bool tce_bypass_enabled;
+ uint64_t tce_bypass_base;
+
+ /*
+ * Used to track whether we've done DMA setup for this PE or not. We
+ * want to defer allocating TCE tables, etc until we've added a
+ * non-bridge device to the PE.
+ */
+ bool dma_setup_done;
+
+ /* MSIs. MVE index is identical for 32 and 64 bit MSI
+ * and -1 if not supported. (It's actually identical to the
+ * PE number)
+ */
+ int mve_number;
+
+ /* PEs in compound case */
+ struct pnv_ioda_pe *master;
+ struct list_head slaves;
+
+ /* Link in list of PE#s */
+ struct list_head list;
+};
+
+#define PNV_PHB_FLAG_EEH (1 << 0)
+
+struct pnv_phb {
+ struct pci_controller *hose;
+ enum pnv_phb_type type;
+ enum pnv_phb_model model;
+ u64 hub_id;
+ u64 opal_id;
+ int flags;
+ void __iomem *regs;
+ u64 regs_phys;
+ spinlock_t lock;
+
+#ifdef CONFIG_DEBUG_FS
+ int has_dbgfs;
+ struct dentry *dbgfs;
+#endif
+
+ unsigned int msi_base;
+ struct msi_bitmap msi_bmp;
+ int (*init_m64)(struct pnv_phb *phb);
+ int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
+ void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
+ int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
+
+ struct {
+ /* Global bridge info */
+ unsigned int total_pe_num;
+ unsigned int reserved_pe_idx;
+ unsigned int root_pe_idx;
+
+ /* 32-bit MMIO window */
+ unsigned int m32_size;
+ unsigned int m32_segsize;
+ unsigned int m32_pci_base;
+
+ /* 64-bit MMIO window */
+ unsigned int m64_bar_idx;
+ unsigned long m64_size;
+ unsigned long m64_segsize;
+ unsigned long m64_base;
+#define MAX_M64_BARS 64
+ unsigned long m64_bar_alloc;
+
+ /* IO ports */
+ unsigned int io_size;
+ unsigned int io_segsize;
+ unsigned int io_pci_base;
+
+ /* PE allocation */
+ struct mutex pe_alloc_mutex;
+ unsigned long *pe_alloc;
+ struct pnv_ioda_pe *pe_array;
+
+ /* M32 & IO segment maps */
+ unsigned int *m64_segmap;
+ unsigned int *m32_segmap;
+ unsigned int *io_segmap;
+
+ /* IRQ chip */
+ int irq_chip_init;
+ struct irq_chip irq_chip;
+
+ /* Sorted list of used PE's based
+ * on the sequence of creation
+ */
+ struct list_head pe_list;
+ struct mutex pe_list_mutex;
+
+ /* Reverse map of PEs, indexed by {bus, devfn} */
+ unsigned int pe_rmap[0x10000];
+ } ioda;
+
+ /* PHB and hub diagnostics */
+ unsigned int diag_data_size;
+ u8 *diag_data;
+};
+
+
+/* IODA PE management */
+
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
+{
+ /*
+ * WARNING: We cannot rely on the resource flags. The Linux PCI
+ * allocation code sometimes decides to put a 64-bit prefetchable
+ * BAR in the 32-bit window, so we have to compare the addresses.
+ *
+ * For simplicity we only test resource start.
+ */
+ return (r->start >= phb->ioda.m64_base &&
+ r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
+}
+
+static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
+{
+ unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+
+ return (resource_flags & flags) == flags;
+}
+
+int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+
+void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
+
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count);
+void pnv_ioda_free_pe(struct pnv_ioda_pe *pe);
+
+#ifdef CONFIG_PCI_IOV
+/*
+ * For SR-IOV we want to put each VF's MMIO resource in to a separate PE.
+ * This requires a bit of acrobatics with the MMIO -> PE configuration
+ * and this structure is used to keep track of it all.
+ */
+struct pnv_iov_data {
+ /* number of VFs enabled */
+ u16 num_vfs;
+
+ /* pointer to the array of VF PEs. num_vfs long*/
+ struct pnv_ioda_pe *vf_pe_arr;
+
+ /* Did we map the VF BAR with single-PE IODA BARs? */
+ bool m64_single_mode[PCI_SRIOV_NUM_BARS];
+
+ /*
+ * True if we're using any segmented windows. In that case we need
+ * shift the start of the IOV resource the segment corresponding to
+ * the allocated PE.
+ */
+ bool need_shift;
+
+ /*
+ * Bit mask used to track which m64 windows are used to map the
+ * SR-IOV BARs for this device.
+ */
+ DECLARE_BITMAP(used_m64_bar_mask, MAX_M64_BARS);
+
+ /*
+ * If we map the SR-IOV BARs with a segmented window then
+ * parts of that window will be "claimed" by other PEs.
+ *
+ * "holes" here is used to reserve the leading portion
+ * of the window that is used by other (non VF) PEs.
+ */
+ struct resource holes[PCI_SRIOV_NUM_BARS];
+};
+
+static inline struct pnv_iov_data *pnv_iov_get(struct pci_dev *pdev)
+{
+ return pdev->dev.archdata.iov_data;
+}
+
+void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev);
+resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno);
+
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs);
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev);
+#endif /* CONFIG_PCI_IOV */
+
+extern struct pci_ops pnv_pci_ops;
+
+void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
+ unsigned char *log_buff);
+int pnv_pci_cfg_read(struct pci_dn *pdn,
+ int where, int size, u32 *val);
+int pnv_pci_cfg_write(struct pci_dn *pdn,
+ int where, int size, u32 val);
+extern struct iommu_table *pnv_pci_table_alloc(int nid);
+
+extern void pnv_pci_init_ioda_hub(struct device_node *np);
+extern void pnv_pci_init_ioda2_phb(struct device_node *np);
+extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
+extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
+extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
+
+extern struct pnv_ioda_pe *pnv_pci_bdfn_to_pe(struct pnv_phb *phb, u16 bdfn);
+extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
+extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
+extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+ __u64 window_size, __u32 levels);
+extern int pnv_eeh_post_init(void);
+
+__printf(3, 4)
+extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
+ const char *fmt, ...);
+#define pe_err(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_ERR, fmt, ##__VA_ARGS__)
+#define pe_warn(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__)
+#define pe_info(pe, fmt, ...) \
+ pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
+
+/* pci-ioda-tce.c */
+#define POWERNV_IOMMU_DEFAULT_LEVELS 2
+#define POWERNV_IOMMU_MAX_LEVELS 5
+
+extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
+ unsigned long uaddr, enum dma_data_direction direction,
+ unsigned long attrs);
+extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
+extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction);
+extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index,
+ bool alloc);
+extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
+
+extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ bool alloc_userspace_copy, struct iommu_table *tbl);
+extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
+
+extern long pnv_pci_link_table_and_group(int node, int num,
+ struct iommu_table *tbl,
+ struct iommu_table_group *table_group);
+extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
+ struct iommu_table_group *table_group);
+extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
+ void *tce_mem, u64 tce_size,
+ u64 dma_offset, unsigned int page_shift);
+
+extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
+
+static inline struct pnv_phb *pci_bus_to_pnvhb(struct pci_bus *bus)
+{
+ struct pci_controller *hose = bus->sysdata;
+
+ if (hose)
+ return hose->private_data;
+
+ return NULL;
+}
+
+#endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
new file mode 100644
index 000000000..866efdc10
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _POWERNV_H
+#define _POWERNV_H
+
+/*
+ * There's various hacks scattered throughout the generic powerpc arch code
+ * that needs to call into powernv platform stuff. The prototypes for those
+ * functions are in asm/powernv.h
+ */
+#include <asm/powernv.h>
+
+#ifdef CONFIG_SMP
+extern void pnv_smp_init(void);
+#else
+static inline void pnv_smp_init(void) { }
+#endif
+
+extern void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg) __noreturn;
+
+struct pci_dev;
+
+#ifdef CONFIG_PCI
+extern void pnv_pci_init(void);
+extern void pnv_pci_shutdown(void);
+#else
+static inline void pnv_pci_init(void) { }
+static inline void pnv_pci_shutdown(void) { }
+#endif
+
+extern u32 pnv_get_supported_cpuidle_states(void);
+
+extern void pnv_lpc_init(void);
+
+extern void opal_handle_events(void);
+extern bool opal_have_pending_events(void);
+extern void opal_event_shutdown(void);
+
+bool cpu_core_split_required(void);
+
+struct memcons;
+ssize_t memcons_copy(struct memcons *mc, char *to, loff_t pos, size_t count);
+u32 __init memcons_get_size(struct memcons *mc);
+struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name);
+
+void pnv_rng_init(void);
+
+#endif /* _POWERNV_H */
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
new file mode 100644
index 000000000..196aa70fe
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "powernv-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <asm/archrandom.h>
+#include <asm/cputable.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/smp.h>
+#include "powernv.h"
+
+#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
+
+struct pnv_rng {
+ void __iomem *regs;
+ void __iomem *regs_real;
+ unsigned long mask;
+};
+
+static DEFINE_PER_CPU(struct pnv_rng *, pnv_rng);
+
+static unsigned long rng_whiten(struct pnv_rng *rng, unsigned long val)
+{
+ unsigned long parity;
+
+ /* Calculate the parity of the value */
+ asm (".machine push; \
+ .machine power7; \
+ popcntd %0,%1; \
+ .machine pop;"
+ : "=r" (parity) : "r" (val));
+
+ /* xor our value with the previous mask */
+ val ^= rng->mask;
+
+ /* update the mask based on the parity of this value */
+ rng->mask = (rng->mask << 1) | (parity & 1);
+
+ return val;
+}
+
+static int pnv_get_random_darn(unsigned long *v)
+{
+ unsigned long val;
+
+ /* Using DARN with L=1 - 64-bit conditioned random number */
+ asm volatile(PPC_DARN(%0, 1) : "=r"(val));
+
+ if (val == DARN_ERR)
+ return 0;
+
+ *v = val;
+
+ return 1;
+}
+
+static int __init initialise_darn(void)
+{
+ unsigned long val;
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+
+ for (i = 0; i < 10; i++) {
+ if (pnv_get_random_darn(&val)) {
+ ppc_md.get_random_seed = pnv_get_random_darn;
+ return 0;
+ }
+ }
+ return -EIO;
+}
+
+int pnv_get_random_long(unsigned long *v)
+{
+ struct pnv_rng *rng;
+
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(pnv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(pnv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
+ return 1;
+}
+EXPORT_SYMBOL_GPL(pnv_get_random_long);
+
+static __init void rng_init_per_cpu(struct pnv_rng *rng,
+ struct device_node *dn)
+{
+ int chip_id, cpu;
+
+ chip_id = of_get_ibm_chip_id(dn);
+ if (chip_id == -1)
+ pr_warn("No ibm,chip-id found for %pOF.\n", dn);
+
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(pnv_rng, cpu) == NULL ||
+ cpu_to_chip_id(cpu) == chip_id) {
+ per_cpu(pnv_rng, cpu) = rng;
+ }
+ }
+}
+
+static __init int rng_create(struct device_node *dn)
+{
+ struct pnv_rng *rng;
+ struct resource res;
+ unsigned long val;
+
+ rng = kzalloc(sizeof(*rng), GFP_KERNEL);
+ if (!rng)
+ return -ENOMEM;
+
+ if (of_address_to_resource(dn, 0, &res)) {
+ kfree(rng);
+ return -ENXIO;
+ }
+
+ rng->regs_real = (void __iomem *)res.start;
+
+ rng->regs = of_iomap(dn, 0);
+ if (!rng->regs) {
+ kfree(rng);
+ return -ENXIO;
+ }
+
+ val = in_be64(rng->regs);
+ rng->mask = val;
+
+ rng_init_per_cpu(rng, dn);
+
+ ppc_md.get_random_seed = pnv_get_random_long;
+
+ return 0;
+}
+
+static int __init pnv_get_random_long_early(unsigned long *v)
+{
+ struct device_node *dn;
+
+ if (!slab_is_available())
+ return 0;
+
+ if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early,
+ NULL) != pnv_get_random_long_early)
+ return 0;
+
+ for_each_compatible_node(dn, NULL, "ibm,power-rng")
+ rng_create(dn);
+
+ if (!ppc_md.get_random_seed)
+ return 0;
+ return ppc_md.get_random_seed(v);
+}
+
+void __init pnv_rng_init(void)
+{
+ struct device_node *dn;
+
+ /* Prefer darn over the rest. */
+ if (!initialise_darn())
+ return;
+
+ dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng");
+ if (dn)
+ ppc_md.get_random_seed = pnv_get_random_long_early;
+
+ of_node_put(dn);
+}
+
+static int __init pnv_rng_late_init(void)
+{
+ struct device_node *dn;
+ unsigned long v;
+
+ /* In case it wasn't called during init for some other reason. */
+ if (ppc_md.get_random_seed == pnv_get_random_long_early)
+ pnv_get_random_long_early(&v);
+
+ if (ppc_md.get_random_seed == pnv_get_random_long) {
+ for_each_compatible_node(dn, NULL, "ibm,power-rng")
+ of_platform_device_create(dn, NULL, NULL);
+ }
+
+ return 0;
+}
+machine_subsys_initcall(powernv, pnv_rng_late_init);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
new file mode 100644
index 000000000..4dbb47ddb
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -0,0 +1,587 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV setup code.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#undef DEBUG
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/tty.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/interrupt.h>
+#include <linux/bug.h>
+#include <linux/pci.h>
+#include <linux/cpufreq.h>
+#include <linux/memblock.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/opal.h>
+#include <asm/kexec.h>
+#include <asm/smp.h>
+#include <asm/tm.h>
+#include <asm/setup.h>
+#include <asm/security_features.h>
+
+#include "powernv.h"
+
+
+static bool __init fw_feature_is(const char *state, const char *name,
+ struct device_node *fw_features)
+{
+ struct device_node *np;
+ bool rc = false;
+
+ np = of_get_child_by_name(fw_features, name);
+ if (np) {
+ rc = of_property_read_bool(np, state);
+ of_node_put(np);
+ }
+
+ return rc;
+}
+
+static void __init init_fw_feat_flags(struct device_node *np)
+{
+ if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
+ security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+ if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
+ security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+ if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
+ security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+ if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
+ security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+ if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
+ security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+ if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
+ security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+ if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np))
+ security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+
+ if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np))
+ security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+
+ /*
+ * The features below are enabled by default, so we instead look to see
+ * if firmware has *disabled* them, and clear them if so.
+ */
+ if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
+ security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+ if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+ if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
+
+ if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
+ security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+
+ if (fw_feature_is("enabled", "no-need-l1d-flush-msr-pr-1-to-0", np))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+
+ if (fw_feature_is("enabled", "no-need-l1d-flush-kernel-on-user-access", np))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+
+ if (fw_feature_is("enabled", "no-need-store-drain-on-priv-state-switch", np))
+ security_ftr_clear(SEC_FTR_STF_BARRIER);
+}
+
+static void __init pnv_setup_security_mitigations(void)
+{
+ struct device_node *np, *fw_features;
+ enum l1d_flush_type type;
+ bool enable;
+
+ /* Default to fallback in case fw-features are not available */
+ type = L1D_FLUSH_FALLBACK;
+
+ np = of_find_node_by_name(NULL, "ibm,opal");
+ fw_features = of_get_child_by_name(np, "fw-features");
+ of_node_put(np);
+
+ if (fw_features) {
+ init_fw_feat_flags(fw_features);
+ of_node_put(fw_features);
+
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+ type = L1D_FLUSH_MTTRIG;
+
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
+ type = L1D_FLUSH_ORI;
+ }
+
+ /*
+ * The issues addressed by the entry and uaccess flush don't affect P7
+ * or P8, so on bare metal disable them explicitly in case firmware does
+ * not include the features to disable them. POWER9 and newer processors
+ * should have the appropriate firmware flags.
+ */
+ if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p) ||
+ pvr_version_is(PVR_POWER8E) || pvr_version_is(PVR_POWER8NVL) ||
+ pvr_version_is(PVR_POWER8)) {
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+ }
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
+
+ setup_rfi_flush(type, enable);
+ setup_count_cache_flush();
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
+ setup_entry_flush(enable);
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
+ setup_uaccess_flush(enable);
+
+ setup_stf_barrier();
+}
+
+static void __init pnv_check_guarded_cores(void)
+{
+ struct device_node *dn;
+ int bad_count = 0;
+
+ for_each_node_by_type(dn, "cpu") {
+ if (of_property_match_string(dn, "status", "bad") >= 0)
+ bad_count++;
+ }
+
+ if (bad_count) {
+ printk(" _ _______________\n");
+ pr_cont(" | | / \\\n");
+ pr_cont(" | | | WARNING! |\n");
+ pr_cont(" | | | |\n");
+ pr_cont(" | | | It looks like |\n");
+ pr_cont(" |_| | you have %*d |\n", 3, bad_count);
+ pr_cont(" _ | guarded cores |\n");
+ pr_cont(" (_) \\_______________/\n");
+ }
+}
+
+static void __init pnv_setup_arch(void)
+{
+ set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+
+ pnv_setup_security_mitigations();
+
+ /* Initialize SMP */
+ pnv_smp_init();
+
+ /* Setup RTC and NVRAM callbacks */
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ opal_nvram_init();
+
+ /* Enable NAP mode */
+ powersave_nap = 1;
+
+ pnv_check_guarded_cores();
+
+ /* XXX PMCS */
+
+ pnv_rng_init();
+}
+
+static void __init pnv_add_hw_description(void)
+{
+ struct device_node *dn;
+ const char *s;
+
+ dn = of_find_node_by_path("/ibm,opal/firmware");
+ if (!dn)
+ return;
+
+ if (of_property_read_string(dn, "version", &s) == 0 ||
+ of_property_read_string(dn, "git-id", &s) == 0)
+ seq_buf_printf(&ppc_hw_desc, "opal:%s ", s);
+
+ if (of_property_read_string(dn, "mi-version", &s) == 0)
+ seq_buf_printf(&ppc_hw_desc, "mi:%s ", s);
+
+ of_node_put(dn);
+}
+
+static void __init pnv_init(void)
+{
+ pnv_add_hw_description();
+
+ /*
+ * Initialize the LPC bus now so that legacy serial
+ * ports can be found on it
+ */
+ opal_lpc_init();
+
+#ifdef CONFIG_HVC_OPAL
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ hvc_opal_init_early();
+ else
+#endif
+ add_preferred_console("hvc", 0, NULL);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (!radix_enabled()) {
+ size_t size = sizeof(struct slb_entry) * mmu_slb_size;
+ int i;
+
+ /* Allocate per cpu area to save old slb contents during MCE */
+ for_each_possible_cpu(i) {
+ paca_ptrs[i]->mce_faulty_slbs =
+ memblock_alloc_node(size,
+ __alignof__(struct slb_entry),
+ cpu_to_node(i));
+ }
+ }
+#endif
+}
+
+static void __init pnv_init_IRQ(void)
+{
+ /* Try using a XIVE if available, otherwise use a XICS */
+ if (!xive_native_init())
+ xics_init();
+
+ WARN_ON(!ppc_md.get_irq);
+}
+
+static void pnv_show_cpuinfo(struct seq_file *m)
+{
+ struct device_node *root;
+ const char *model = "";
+
+ root = of_find_node_by_path("/");
+ if (root)
+ model = of_get_property(root, "model", NULL);
+ seq_printf(m, "machine\t\t: PowerNV %s\n", model);
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ seq_printf(m, "firmware\t: OPAL\n");
+ else
+ seq_printf(m, "firmware\t: BML\n");
+ of_node_put(root);
+ if (radix_enabled())
+ seq_printf(m, "MMU\t\t: Radix\n");
+ else
+ seq_printf(m, "MMU\t\t: Hash\n");
+}
+
+static void pnv_prepare_going_down(void)
+{
+ /*
+ * Disable all notifiers from OPAL, we can't
+ * service interrupts anymore anyway
+ */
+ opal_event_shutdown();
+
+ /* Print flash update message if one is scheduled. */
+ opal_flash_update_print_message();
+
+ smp_send_stop();
+
+ hard_irq_disable();
+}
+
+static void __noreturn pnv_restart(char *cmd)
+{
+ long rc;
+
+ pnv_prepare_going_down();
+
+ do {
+ if (!cmd || !strlen(cmd))
+ rc = opal_cec_reboot();
+ else if (strcmp(cmd, "full") == 0)
+ rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
+ else if (strcmp(cmd, "mpipl") == 0)
+ rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, NULL);
+ else if (strcmp(cmd, "error") == 0)
+ rc = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, NULL);
+ else if (strcmp(cmd, "fast") == 0)
+ rc = opal_cec_reboot2(OPAL_REBOOT_FAST, NULL);
+ else
+ rc = OPAL_UNSUPPORTED;
+
+ if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+ /* Opal is busy wait for some time and retry */
+ opal_poll_events(NULL);
+ mdelay(10);
+
+ } else if (cmd && rc) {
+ /* Unknown error while issuing reboot */
+ if (rc == OPAL_UNSUPPORTED)
+ pr_err("Unsupported '%s' reboot.\n", cmd);
+ else
+ pr_err("Unable to issue '%s' reboot. Err=%ld\n",
+ cmd, rc);
+ pr_info("Forcing a cec-reboot\n");
+ cmd = NULL;
+ rc = OPAL_BUSY;
+
+ } else if (rc != OPAL_SUCCESS) {
+ /* Unknown error while issuing cec-reboot */
+ pr_err("Unable to reboot. Err=%ld\n", rc);
+ }
+
+ } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT);
+
+ for (;;)
+ opal_poll_events(NULL);
+}
+
+static void __noreturn pnv_power_off(void)
+{
+ long rc = OPAL_BUSY;
+
+ pnv_prepare_going_down();
+
+ while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+ rc = opal_cec_power_down(0);
+ if (rc == OPAL_BUSY_EVENT)
+ opal_poll_events(NULL);
+ else
+ mdelay(10);
+ }
+ for (;;)
+ opal_poll_events(NULL);
+}
+
+static void __noreturn pnv_halt(void)
+{
+ pnv_power_off();
+}
+
+static void pnv_progress(char *s, unsigned short hex)
+{
+}
+
+static void pnv_shutdown(void)
+{
+ /* Let the PCI code clear up IODA tables */
+ pnv_pci_shutdown();
+
+ /*
+ * Stop OPAL activity: Unregister all OPAL interrupts so they
+ * don't fire up while we kexec and make sure all potentially
+ * DMA'ing ops are complete (such as dump retrieval).
+ */
+ opal_shutdown();
+}
+
+#ifdef CONFIG_KEXEC_CORE
+static void pnv_kexec_wait_secondaries_down(void)
+{
+ int my_cpu, i, notified = -1;
+
+ my_cpu = get_cpu();
+
+ for_each_online_cpu(i) {
+ uint8_t status;
+ int64_t rc, timeout = 1000;
+
+ if (i == my_cpu)
+ continue;
+
+ for (;;) {
+ rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
+ &status);
+ if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
+ break;
+ barrier();
+ if (i != notified) {
+ printk(KERN_INFO "kexec: waiting for cpu %d "
+ "(physical %d) to enter OPAL\n",
+ i, paca_ptrs[i]->hw_cpu_id);
+ notified = i;
+ }
+
+ /*
+ * On crash secondaries might be unreachable or hung,
+ * so timeout if we've waited too long
+ * */
+ mdelay(1);
+ if (timeout-- == 0) {
+ printk(KERN_ERR "kexec: timed out waiting for "
+ "cpu %d (physical %d) to enter OPAL\n",
+ i, paca_ptrs[i]->hw_cpu_id);
+ break;
+ }
+ }
+ }
+}
+
+static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+ u64 reinit_flags;
+
+ if (xive_enabled())
+ xive_teardown_cpu();
+ else
+ xics_kexec_teardown_cpu(secondary);
+
+ /* On OPAL, we return all CPUs to firmware */
+ if (!firmware_has_feature(FW_FEATURE_OPAL))
+ return;
+
+ if (secondary) {
+ /* Return secondary CPUs to firmware on OPAL v3 */
+ mb();
+ get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
+ mb();
+
+ /* Return the CPU to OPAL */
+ opal_return_cpu();
+ } else {
+ /* Primary waits for the secondaries to have reached OPAL */
+ pnv_kexec_wait_secondaries_down();
+
+ /* Switch XIVE back to emulation mode */
+ if (xive_enabled())
+ xive_shutdown();
+
+ /*
+ * We might be running as little-endian - now that interrupts
+ * are disabled, reset the HILE bit to big-endian so we don't
+ * take interrupts in the wrong endian later
+ *
+ * We reinit to enable both radix and hash on P9 to ensure
+ * the mode used by the next kernel is always supported.
+ */
+ reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
+ OPAL_REINIT_CPUS_MMU_HASH;
+ opal_reinit_cpus(reinit_flags);
+ }
+}
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long pnv_memory_block_size(void)
+{
+ return memory_block_size;
+}
+#endif
+
+static void __init pnv_setup_machdep_opal(void)
+{
+ ppc_md.get_boot_time = opal_get_boot_time;
+ ppc_md.restart = pnv_restart;
+ pm_power_off = pnv_power_off;
+ ppc_md.halt = pnv_halt;
+ /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
+ ppc_md.machine_check_exception = opal_machine_check;
+ ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
+ if (opal_check_token(OPAL_HANDLE_HMI2))
+ ppc_md.hmi_exception_early = opal_hmi_exception_early2;
+ else
+ ppc_md.hmi_exception_early = opal_hmi_exception_early;
+ ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
+}
+
+static int __init pnv_probe(void)
+{
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ pnv_setup_machdep_opal();
+
+ pr_debug("PowerNV detected !\n");
+
+ pnv_init();
+
+ return 1;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void __init pnv_tm_init(void)
+{
+ if (!firmware_has_feature(FW_FEATURE_OPAL) ||
+ !pvr_version_is(PVR_POWER9) ||
+ early_cpu_has_feature(CPU_FTR_TM))
+ return;
+
+ if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
+ return;
+
+ pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
+ cur_cpu_spec->cpu_features |= CPU_FTR_TM;
+ /* Make sure "normal" HTM is off (it should be) */
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
+ /* Turn on no suspend mode, and HTM no SC */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
+ PPC_FEATURE2_HTM_NOSC;
+ tm_suspend_disabled = true;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+/*
+ * Returns the cpu frequency for 'cpu' in Hz. This is used by
+ * /proc/cpuinfo
+ */
+static unsigned long pnv_get_proc_freq(unsigned int cpu)
+{
+ unsigned long ret_freq;
+
+ ret_freq = cpufreq_get(cpu) * 1000ul;
+
+ /*
+ * If the backend cpufreq driver does not exist,
+ * then fallback to old way of reporting the clockrate.
+ */
+ if (!ret_freq)
+ ret_freq = ppc_proc_freq;
+ return ret_freq;
+}
+
+static long pnv_machine_check_early(struct pt_regs *regs)
+{
+ long handled = 0;
+
+ if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+ handled = cur_cpu_spec->machine_check_early(regs);
+
+ return handled;
+}
+
+define_machine(powernv) {
+ .name = "PowerNV",
+ .compatible = "ibm,powernv",
+ .probe = pnv_probe,
+ .setup_arch = pnv_setup_arch,
+ .init_IRQ = pnv_init_IRQ,
+ .show_cpuinfo = pnv_show_cpuinfo,
+ .get_proc_freq = pnv_get_proc_freq,
+ .discover_phbs = pnv_pci_init,
+ .progress = pnv_progress,
+ .machine_shutdown = pnv_shutdown,
+ .power_save = NULL,
+ .machine_check_early = pnv_machine_check_early,
+#ifdef CONFIG_KEXEC_CORE
+ .kexec_cpu_down = pnv_kexec_cpu_down,
+#endif
+#ifdef CONFIG_MEMORY_HOTPLUG
+ .memory_block_size = pnv_memory_block_size,
+#endif
+};
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
new file mode 100644
index 000000000..9e1a25398
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SMP support for PowerNV machines.
+ *
+ * Copyright 2011 IBM Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/hotplug.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+
+#include <asm/irq.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/vdso_datapage.h>
+#include <asm/cputhreads.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/opal.h>
+#include <asm/runlatch.h>
+#include <asm/code-patching.h>
+#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
+#include <asm/cpuidle.h>
+#include <asm/kexec.h>
+#include <asm/reg.h>
+#include <asm/powernv.h>
+
+#include "powernv.h"
+
+#ifdef DEBUG
+#include <asm/udbg.h>
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...) do { } while (0)
+#endif
+
+static void pnv_smp_setup_cpu(int cpu)
+{
+ /*
+ * P9 workaround for CI vector load (see traps.c),
+ * enable the corresponding HMI interrupt
+ */
+ if (pvr_version_is(PVR_POWER9))
+ mtspr(SPRN_HMEER, mfspr(SPRN_HMEER) | PPC_BIT(17));
+
+ if (xive_enabled())
+ xive_smp_setup_cpu();
+ else if (cpu != boot_cpuid)
+ xics_setup_cpu();
+}
+
+static int pnv_smp_kick_cpu(int nr)
+{
+ unsigned int pcpu;
+ unsigned long start_here =
+ __pa(ppc_function_entry(generic_secondary_smp_init));
+ long rc;
+ uint8_t status;
+
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
+
+ pcpu = get_hard_smp_processor_id(nr);
+ /*
+ * If we already started or OPAL is not supported, we just
+ * kick the CPU via the PACA
+ */
+ if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
+ goto kick;
+
+ /*
+ * At this point, the CPU can either be spinning on the way in
+ * from kexec or be inside OPAL waiting to be started for the
+ * first time. OPAL v3 allows us to query OPAL to know if it
+ * has the CPUs, so we do that
+ */
+ rc = opal_query_cpu_status(pcpu, &status);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr);
+ return -ENODEV;
+ }
+
+ /*
+ * Already started, just kick it, probably coming from
+ * kexec and spinning
+ */
+ if (status == OPAL_THREAD_STARTED)
+ goto kick;
+
+ /*
+ * Available/inactive, let's kick it
+ */
+ if (status == OPAL_THREAD_INACTIVE) {
+ pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
+ rc = opal_start_cpu(pcpu, start_here);
+ if (rc != OPAL_SUCCESS) {
+ pr_warn("OPAL Error %ld starting CPU %d\n", rc, nr);
+ return -ENODEV;
+ }
+ } else {
+ /*
+ * An unavailable CPU (or any other unknown status)
+ * shouldn't be started. It should also
+ * not be in the possible map but currently it can
+ * happen
+ */
+ pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
+ " (status %d)...\n", nr, pcpu, status);
+ return -ENODEV;
+ }
+
+kick:
+ return smp_generic_kick_cpu(nr);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static int pnv_smp_cpu_disable(void)
+{
+ int cpu = smp_processor_id();
+
+ /* This is identical to pSeries... might consolidate by
+ * moving migrate_irqs_away to a ppc_md with default to
+ * the generic fixup_irqs. --BenH.
+ */
+ set_cpu_online(cpu, false);
+ vdso_data->processorCount--;
+ if (cpu == boot_cpuid)
+ boot_cpuid = cpumask_any(cpu_online_mask);
+ if (xive_enabled())
+ xive_smp_disable_cpu();
+ else
+ xics_migrate_irqs_away();
+
+ cleanup_cpu_mmu_context();
+
+ return 0;
+}
+
+static void pnv_flush_interrupts(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (xive_enabled())
+ xive_flush_interrupt();
+ else
+ icp_opal_flush_interrupt();
+ } else {
+ icp_native_flush_interrupt();
+ }
+}
+
+static void pnv_cpu_offline_self(void)
+{
+ unsigned long srr1, unexpected_mask, wmask;
+ unsigned int cpu;
+ u64 lpcr_val;
+
+ /* Standard hot unplug procedure */
+
+ idle_task_exit();
+ cpu = smp_processor_id();
+ DBG("CPU%d offline\n", cpu);
+ generic_set_cpu_dead(cpu);
+ smp_wmb();
+
+ wmask = SRR1_WAKEMASK;
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ wmask = SRR1_WAKEMASK_P8;
+
+ /*
+ * This turns the irq soft-disabled state we're called with, into a
+ * hard-disabled state with pending irq_happened interrupts cleared.
+ *
+ * PACA_IRQ_DEC - Decrementer should be ignored.
+ * PACA_IRQ_HMI - Can be ignored, processing is done in real mode.
+ * PACA_IRQ_DBELL, EE, PMI - Unexpected.
+ */
+ hard_irq_disable();
+ if (generic_check_cpu_restart(cpu))
+ goto out;
+
+ unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS);
+ if (local_paca->irq_happened & unexpected_mask) {
+ if (local_paca->irq_happened & PACA_IRQ_EE)
+ pnv_flush_interrupts();
+ DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n",
+ cpu, local_paca->irq_happened);
+ }
+ local_paca->irq_happened = PACA_IRQ_HARD_DIS;
+
+ /*
+ * We don't want to take decrementer interrupts while we are
+ * offline, so clear LPCR:PECE1. We keep PECE2 (and
+ * LPCR_PECE_HVEE on P9) enabled so as to let IPIs in.
+ *
+ * If the CPU gets woken up by a special wakeup, ensure that
+ * the SLW engine sets LPCR with decrementer bit cleared, else
+ * the CPU will come back to the kernel due to a spurious
+ * wakeup.
+ */
+ lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1;
+ pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
+
+ while (!generic_check_cpu_restart(cpu)) {
+ /*
+ * Clear IPI flag, since we don't handle IPIs while
+ * offline, except for those when changing micro-threading
+ * mode, which are handled explicitly below, and those
+ * for coming online, which are handled via
+ * generic_check_cpu_restart() calls.
+ */
+ kvmppc_clear_host_ipi(cpu);
+
+ srr1 = pnv_cpu_offline(cpu);
+
+ WARN_ON_ONCE(!irqs_disabled());
+ WARN_ON(lazy_irq_pending());
+
+ /*
+ * If the SRR1 value indicates that we woke up due to
+ * an external interrupt, then clear the interrupt.
+ * We clear the interrupt before checking for the
+ * reason, so as to avoid a race where we wake up for
+ * some other reason, find nothing and clear the interrupt
+ * just as some other cpu is sending us an interrupt.
+ * If we returned from power7_nap as a result of
+ * having finished executing in a KVM guest, then srr1
+ * contains 0.
+ */
+ if (((srr1 & wmask) == SRR1_WAKEEE) ||
+ ((srr1 & wmask) == SRR1_WAKEHVI)) {
+ pnv_flush_interrupts();
+ } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
+ unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+ asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+ } else if ((srr1 & wmask) == SRR1_WAKERESET) {
+ irq_set_pending_from_srr1(srr1);
+ /* Does not return */
+ }
+
+ smp_mb();
+
+ /*
+ * For kdump kernels, we process the ipi and jump to
+ * crash_ipi_callback
+ */
+ if (kdump_in_progress()) {
+ /*
+ * If we got to this point, we've not used
+ * NMI's, otherwise we would have gone
+ * via the SRR1_WAKERESET path. We are
+ * using regular IPI's for waking up offline
+ * threads.
+ */
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ crash_ipi_callback(&regs);
+ /* Does not return */
+ }
+
+ if (cpu_core_split_required())
+ continue;
+
+ if (srr1 && !generic_check_cpu_restart(cpu))
+ DBG("CPU%d Unexpected exit while offline srr1=%lx!\n",
+ cpu, srr1);
+
+ }
+
+ /*
+ * Re-enable decrementer interrupts in LPCR.
+ *
+ * Further, we want stop states to be woken up by decrementer
+ * for non-hotplug cases. So program the LPCR via stop api as
+ * well.
+ */
+ lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
+ pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
+out:
+ DBG("CPU%d coming online...\n", cpu);
+}
+
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static int pnv_cpu_bootable(unsigned int nr)
+{
+ /*
+ * Starting with POWER8, the subcore logic relies on all threads of a
+ * core being booted so that they can participate in split mode
+ * switches. So on those machines we ignore the smt_enabled_at_boot
+ * setting (smt-enabled on the kernel command line).
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return 1;
+
+ return smp_generic_cpu_bootable(nr);
+}
+
+static int pnv_smp_prepare_cpu(int cpu)
+{
+ if (xive_enabled())
+ return xive_smp_prepare_cpu(cpu);
+ return 0;
+}
+
+/* Cause IPI as setup by the interrupt controller (xics or xive) */
+static void (*ic_cause_ipi)(int cpu);
+
+static void pnv_cause_ipi(int cpu)
+{
+ if (doorbell_try_core_ipi(cpu))
+ return;
+
+ ic_cause_ipi(cpu);
+}
+
+static void __init pnv_smp_probe(void)
+{
+ if (xive_enabled())
+ xive_smp_probe();
+ else
+ xics_smp_probe();
+
+ if (cpu_has_feature(CPU_FTR_DBELL)) {
+ ic_cause_ipi = smp_ops->cause_ipi;
+ WARN_ON(!ic_cause_ipi);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ smp_ops->cause_ipi = doorbell_global_ipi;
+ else
+ smp_ops->cause_ipi = pnv_cause_ipi;
+ }
+}
+
+noinstr static int pnv_system_reset_exception(struct pt_regs *regs)
+{
+ if (smp_handle_nmi_ipi(regs))
+ return 1;
+ return 0;
+}
+
+static int pnv_cause_nmi_ipi(int cpu)
+{
+ int64_t rc;
+
+ if (cpu >= 0) {
+ int h = get_hard_smp_processor_id(cpu);
+
+ if (opal_check_token(OPAL_QUIESCE))
+ opal_quiesce(QUIESCE_HOLD, h);
+
+ rc = opal_signal_system_reset(h);
+
+ if (opal_check_token(OPAL_QUIESCE))
+ opal_quiesce(QUIESCE_RESUME, h);
+
+ if (rc != OPAL_SUCCESS)
+ return 0;
+ return 1;
+
+ } else if (cpu == NMI_IPI_ALL_OTHERS) {
+ bool success = true;
+ int c;
+
+ if (opal_check_token(OPAL_QUIESCE))
+ opal_quiesce(QUIESCE_HOLD, -1);
+
+ /*
+ * We do not use broadcasts (yet), because it's not clear
+ * exactly what semantics Linux wants or the firmware should
+ * provide.
+ */
+ for_each_online_cpu(c) {
+ if (c == smp_processor_id())
+ continue;
+
+ rc = opal_signal_system_reset(
+ get_hard_smp_processor_id(c));
+ if (rc != OPAL_SUCCESS)
+ success = false;
+ }
+
+ if (opal_check_token(OPAL_QUIESCE))
+ opal_quiesce(QUIESCE_RESUME, -1);
+
+ if (success)
+ return 1;
+
+ /*
+ * Caller will fall back to doorbells, which may pick
+ * up the remainders.
+ */
+ }
+
+ return 0;
+}
+
+static struct smp_ops_t pnv_smp_ops = {
+ .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */
+ .cause_ipi = NULL, /* Filled at runtime by pnv_smp_probe() */
+ .cause_nmi_ipi = NULL,
+ .probe = pnv_smp_probe,
+ .prepare_cpu = pnv_smp_prepare_cpu,
+ .kick_cpu = pnv_smp_kick_cpu,
+ .setup_cpu = pnv_smp_setup_cpu,
+ .cpu_bootable = pnv_cpu_bootable,
+#ifdef CONFIG_HOTPLUG_CPU
+ .cpu_disable = pnv_smp_cpu_disable,
+ .cpu_die = generic_cpu_die,
+ .cpu_offline_self = pnv_cpu_offline_self,
+#endif /* CONFIG_HOTPLUG_CPU */
+};
+
+/* This is called very early during platform setup_arch */
+void __init pnv_smp_init(void)
+{
+ if (opal_check_token(OPAL_SIGNAL_SYSTEM_RESET)) {
+ ppc_md.system_reset_exception = pnv_system_reset_exception;
+ pnv_smp_ops.cause_nmi_ipi = pnv_cause_nmi_ipi;
+ }
+ smp_ops = &pnv_smp_ops;
+
+#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_KEXEC_CORE
+ crash_wake_offline = 1;
+#endif
+#endif
+}
diff --git a/arch/powerpc/platforms/powernv/subcore-asm.S b/arch/powerpc/platforms/powernv/subcore-asm.S
new file mode 100644
index 000000000..e038f6761
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore-asm.S
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+
+#include "subcore.h"
+
+
+_GLOBAL(split_core_secondary_loop)
+ /*
+ * r3 = u8 *state, used throughout the routine
+ * r4 = temp
+ * r5 = temp
+ * ..
+ * r12 = MSR
+ */
+ mfmsr r12
+
+ /* Disable interrupts so SRR0/1 don't get trashed */
+ li r4,0
+ ori r4,r4,MSR_EE|MSR_SE|MSR_BE|MSR_RI
+ andc r4,r12,r4
+ sync
+ mtmsrd r4
+
+ /* Switch to real mode and leave interrupts off */
+ li r5, MSR_IR|MSR_DR
+ andc r5, r4, r5
+
+ LOAD_REG_ADDR(r4, real_mode)
+
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+ rfid
+ b . /* prevent speculative execution */
+
+real_mode:
+ /* Grab values from unsplit SPRs */
+ mfspr r6, SPRN_LDBAR
+ mfspr r7, SPRN_PMMAR
+ mfspr r8, SPRN_PMCR
+ mfspr r9, SPRN_RPR
+ mfspr r10, SPRN_SDR1
+
+ /* Order reading the SPRs vs telling the primary we are ready to split */
+ sync
+
+ /* Tell thread 0 we are in real mode */
+ li r4, SYNC_STEP_REAL_MODE
+ stb r4, 0(r3)
+
+ li r5, (HID0_POWER8_4LPARMODE | HID0_POWER8_2LPARMODE)@highest
+ sldi r5, r5, 48
+
+ /* Loop until we see the split happen in HID0 */
+1: mfspr r4, SPRN_HID0
+ and. r4, r4, r5
+ beq 1b
+
+ /*
+ * We only need to initialise the below regs once for each subcore,
+ * but it's simpler and harmless to do it on each thread.
+ */
+
+ /* Make sure various SPRS have sane values */
+ li r4, 0
+ mtspr SPRN_LPID, r4
+ mtspr SPRN_PCR, r4
+ mtspr SPRN_HDEC, r4
+
+ /* Restore SPR values now we are split */
+ mtspr SPRN_LDBAR, r6
+ mtspr SPRN_PMMAR, r7
+ mtspr SPRN_PMCR, r8
+ mtspr SPRN_RPR, r9
+ mtspr SPRN_SDR1, r10
+
+ LOAD_REG_ADDR(r5, virtual_mode)
+
+ /* Get out of real mode */
+ mtspr SPRN_SRR0,r5
+ mtspr SPRN_SRR1,r12
+ rfid
+ b . /* prevent speculative execution */
+
+virtual_mode:
+ blr
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
new file mode 100644
index 000000000..191424468
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "powernv: " fmt
+
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/gfp.h>
+#include <linux/smp.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/kvm_ppc.h>
+#include <asm/machdep.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
+
+#include <trace/events/ipi.h>
+
+#include "subcore.h"
+#include "powernv.h"
+
+
+/*
+ * Split/unsplit procedure:
+ *
+ * A core can be in one of three states, unsplit, 2-way split, and 4-way split.
+ *
+ * The mapping to subcores_per_core is simple:
+ *
+ * State | subcores_per_core
+ * ------------|------------------
+ * Unsplit | 1
+ * 2-way split | 2
+ * 4-way split | 4
+ *
+ * The core is split along thread boundaries, the mapping between subcores and
+ * threads is as follows:
+ *
+ * Unsplit:
+ * ----------------------------
+ * Subcore | 0 |
+ * ----------------------------
+ * Thread | 0 1 2 3 4 5 6 7 |
+ * ----------------------------
+ *
+ * 2-way split:
+ * -------------------------------------
+ * Subcore | 0 | 1 |
+ * -------------------------------------
+ * Thread | 0 1 2 3 | 4 5 6 7 |
+ * -------------------------------------
+ *
+ * 4-way split:
+ * -----------------------------------------
+ * Subcore | 0 | 1 | 2 | 3 |
+ * -----------------------------------------
+ * Thread | 0 1 | 2 3 | 4 5 | 6 7 |
+ * -----------------------------------------
+ *
+ *
+ * Transitions
+ * -----------
+ *
+ * It is not possible to transition between either of the split states, the
+ * core must first be unsplit. The legal transitions are:
+ *
+ * ----------- ---------------
+ * | | <----> | 2-way split |
+ * | | ---------------
+ * | Unsplit |
+ * | | ---------------
+ * | | <----> | 4-way split |
+ * ----------- ---------------
+ *
+ * Unsplitting
+ * -----------
+ *
+ * Unsplitting is the simpler procedure. It requires thread 0 to request the
+ * unsplit while all other threads NAP.
+ *
+ * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells
+ * the hardware that if all threads except 0 are napping, the hardware should
+ * unsplit the core.
+ *
+ * Non-zero threads are sent to a NAP loop, they don't exit the loop until they
+ * see the core unsplit.
+ *
+ * Core 0 spins waiting for the hardware to see all the other threads napping
+ * and perform the unsplit.
+ *
+ * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them
+ * out of NAP. They will then see the core unsplit and exit the NAP loop.
+ *
+ * Splitting
+ * ---------
+ *
+ * The basic splitting procedure is fairly straight forward. However it is
+ * complicated by the fact that after the split occurs, the newly created
+ * subcores are not in a fully initialised state.
+ *
+ * Most notably the subcores do not have the correct value for SDR1, which
+ * means they must not be running in virtual mode when the split occurs. The
+ * subcores have separate timebases SPRs but these are pre-synchronised by
+ * opal.
+ *
+ * To begin with secondary threads are sent to an assembly routine. There they
+ * switch to real mode, so they are immune to the uninitialised SDR1 value.
+ * Once in real mode they indicate that they are in real mode, and spin waiting
+ * to see the core split.
+ *
+ * Thread 0 waits to see that all secondaries are in real mode, and then begins
+ * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which
+ * prevents the hardware from unsplitting. Then it sets the appropriate HID bit
+ * to request the split, and spins waiting to see that the split has happened.
+ *
+ * Concurrently the secondaries will notice the split. When they do they set up
+ * their SPRs, notably SDR1, and then they can return to virtual mode and exit
+ * the procedure.
+ */
+
+/* Initialised at boot by subcore_init() */
+static int subcores_per_core;
+
+/*
+ * Used to communicate to offline cpus that we want them to pop out of the
+ * offline loop and do a split or unsplit.
+ *
+ * 0 - no split happening
+ * 1 - unsplit in progress
+ * 2 - split to 2 in progress
+ * 4 - split to 4 in progress
+ */
+static int new_split_mode;
+
+static cpumask_var_t cpu_offline_mask;
+
+struct split_state {
+ u8 step;
+ u8 master;
+};
+
+static DEFINE_PER_CPU(struct split_state, split_state);
+
+static void wait_for_sync_step(int step)
+{
+ int i, cpu = smp_processor_id();
+
+ for (i = cpu + 1; i < cpu + threads_per_core; i++)
+ while(per_cpu(split_state, i).step < step)
+ barrier();
+
+ /* Order the wait loop vs any subsequent loads/stores. */
+ mb();
+}
+
+static void update_hid_in_slw(u64 hid0)
+{
+ u64 idle_states = pnv_get_supported_cpuidle_states();
+
+ if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+ /* OPAL call to patch slw with the new HID0 value */
+ u64 cpu_pir = hard_smp_processor_id();
+
+ opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
+ }
+}
+
+static inline void update_power8_hid0(unsigned long hid0)
+{
+ /*
+ * The HID0 update on Power8 should at the very least be
+ * preceded by a SYNC instruction followed by an ISYNC
+ * instruction
+ */
+ asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
+}
+
+static void unsplit_core(void)
+{
+ u64 hid0, mask;
+ int i, cpu;
+
+ mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
+
+ cpu = smp_processor_id();
+ if (cpu_thread_in_core(cpu) != 0) {
+ while (mfspr(SPRN_HID0) & mask)
+ power7_idle_type(PNV_THREAD_NAP);
+
+ per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
+ return;
+ }
+
+ hid0 = mfspr(SPRN_HID0);
+ hid0 &= ~HID0_POWER8_DYNLPARDIS;
+ update_power8_hid0(hid0);
+ update_hid_in_slw(hid0);
+
+ while (mfspr(SPRN_HID0) & mask)
+ cpu_relax();
+
+ /* Wake secondaries out of NAP */
+ for (i = cpu + 1; i < cpu + threads_per_core; i++)
+ smp_send_reschedule(i);
+
+ wait_for_sync_step(SYNC_STEP_UNSPLIT);
+}
+
+static void split_core(int new_mode)
+{
+ struct { u64 value; u64 mask; } split_parms[2] = {
+ { HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE },
+ { HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE }
+ };
+ int i, cpu;
+ u64 hid0;
+
+ /* Convert new_mode (2 or 4) into an index into our parms array */
+ i = (new_mode >> 1) - 1;
+ BUG_ON(i < 0 || i > 1);
+
+ cpu = smp_processor_id();
+ if (cpu_thread_in_core(cpu) != 0) {
+ split_core_secondary_loop(&per_cpu(split_state, cpu).step);
+ return;
+ }
+
+ wait_for_sync_step(SYNC_STEP_REAL_MODE);
+
+ /* Write new mode */
+ hid0 = mfspr(SPRN_HID0);
+ hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
+ update_power8_hid0(hid0);
+ update_hid_in_slw(hid0);
+
+ /* Wait for it to happen */
+ while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
+ cpu_relax();
+}
+
+static void cpu_do_split(int new_mode)
+{
+ /*
+ * At boot subcores_per_core will be 0, so we will always unsplit at
+ * boot. In the usual case where the core is already unsplit it's a
+ * nop, and this just ensures the kernel's notion of the mode is
+ * consistent with the hardware.
+ */
+ if (subcores_per_core != 1)
+ unsplit_core();
+
+ if (new_mode != 1)
+ split_core(new_mode);
+
+ mb();
+ per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED;
+}
+
+bool cpu_core_split_required(void)
+{
+ smp_rmb();
+
+ if (!new_split_mode)
+ return false;
+
+ cpu_do_split(new_split_mode);
+
+ return true;
+}
+
+void update_subcore_sibling_mask(void)
+{
+ int cpu;
+ /*
+ * sibling mask for the first cpu. Left shift this by required bits
+ * to get sibling mask for the rest of the cpus.
+ */
+ int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1;
+
+ for_each_possible_cpu(cpu) {
+ int tid = cpu_thread_in_core(cpu);
+ int offset = (tid / threads_per_subcore) * threads_per_subcore;
+ int mask = sibling_mask_first_cpu << offset;
+
+ paca_ptrs[cpu]->subcore_sibling_mask = mask;
+
+ }
+}
+
+static int cpu_update_split_mode(void *data)
+{
+ int cpu, new_mode = *(int *)data;
+
+ if (this_cpu_ptr(&split_state)->master) {
+ new_split_mode = new_mode;
+ smp_wmb();
+
+ cpumask_andnot(cpu_offline_mask, cpu_present_mask,
+ cpu_online_mask);
+
+ /* This should work even though the cpu is offline */
+ for_each_cpu(cpu, cpu_offline_mask)
+ smp_send_reschedule(cpu);
+ }
+
+ cpu_do_split(new_mode);
+
+ if (this_cpu_ptr(&split_state)->master) {
+ /* Wait for all cpus to finish before we touch subcores_per_core */
+ for_each_present_cpu(cpu) {
+ if (cpu >= setup_max_cpus)
+ break;
+
+ while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED)
+ barrier();
+ }
+
+ new_split_mode = 0;
+
+ /* Make the new mode public */
+ subcores_per_core = new_mode;
+ threads_per_subcore = threads_per_core / subcores_per_core;
+ update_subcore_sibling_mask();
+
+ /* Make sure the new mode is written before we exit */
+ mb();
+ }
+
+ return 0;
+}
+
+static int set_subcores_per_core(int new_mode)
+{
+ struct split_state *state;
+ int cpu;
+
+ if (kvm_hv_mode_active()) {
+ pr_err("Unable to change split core mode while KVM active.\n");
+ return -EBUSY;
+ }
+
+ /*
+ * We are only called at boot, or from the sysfs write. If that ever
+ * changes we'll need a lock here.
+ */
+ BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3);
+
+ for_each_present_cpu(cpu) {
+ state = &per_cpu(split_state, cpu);
+ state->step = SYNC_STEP_INITIAL;
+ state->master = 0;
+ }
+
+ cpus_read_lock();
+
+ /* This cpu will update the globals before exiting stop machine */
+ this_cpu_ptr(&split_state)->master = 1;
+
+ /* Ensure state is consistent before we call the other cpus */
+ mb();
+
+ stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
+ cpu_online_mask);
+
+ cpus_read_unlock();
+
+ return 0;
+}
+
+static ssize_t __used store_subcores_per_core(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int rc;
+
+ /* We are serialised by the attribute lock */
+
+ rc = sscanf(buf, "%lx", &val);
+ if (rc != 1)
+ return -EINVAL;
+
+ switch (val) {
+ case 1:
+ case 2:
+ case 4:
+ if (subcores_per_core == val)
+ /* Nothing to do */
+ goto out;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rc = set_subcores_per_core(val);
+ if (rc)
+ return rc;
+
+out:
+ return count;
+}
+
+static ssize_t show_subcores_per_core(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%x\n", subcores_per_core);
+}
+
+static DEVICE_ATTR(subcores_per_core, 0644,
+ show_subcores_per_core, store_subcores_per_core);
+
+static int subcore_init(void)
+{
+ struct device *dev_root;
+ unsigned pvr_ver;
+ int rc = 0;
+
+ pvr_ver = PVR_VER(mfspr(SPRN_PVR));
+
+ if (pvr_ver != PVR_POWER8 &&
+ pvr_ver != PVR_POWER8E &&
+ pvr_ver != PVR_POWER8NVL)
+ return 0;
+
+ /*
+ * We need all threads in a core to be present to split/unsplit so
+ * continue only if max_cpus are aligned to threads_per_core.
+ */
+ if (setup_max_cpus % threads_per_core)
+ return 0;
+
+ BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL));
+
+ set_subcores_per_core(1);
+
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ rc = device_create_file(dev_root, &dev_attr_subcores_per_core);
+ put_device(dev_root);
+ }
+ return rc;
+}
+machine_device_initcall(powernv, subcore_init);
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
new file mode 100644
index 000000000..77feee843
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+/* These are ordered and tested with <= */
+#define SYNC_STEP_INITIAL 0
+#define SYNC_STEP_UNSPLIT 1 /* Set by secondary when it sees unsplit */
+#define SYNC_STEP_REAL_MODE 2 /* Set by secondary when in real mode */
+#define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
+void split_core_secondary_loop(u8 *state);
+extern void update_subcore_sibling_mask(void);
+#else
+static inline void update_subcore_sibling_mask(void) { }
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/platforms/powernv/ultravisor.c b/arch/powerpc/platforms/powernv/ultravisor.c
new file mode 100644
index 000000000..67c8c4b2d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/ultravisor.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ultravisor high level interfaces
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <linux/init.h>
+#include <linux/printk.h>
+#include <linux/of_fdt.h>
+#include <linux/of.h>
+
+#include <asm/ultravisor.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+
+#include "powernv.h"
+
+static struct kobject *ultravisor_kobj;
+
+int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if (!of_flat_dt_is_compatible(node, "ibm,ultravisor"))
+ return 0;
+
+ powerpc_firmware_features |= FW_FEATURE_ULTRAVISOR;
+ pr_debug("Ultravisor detected!\n");
+ return 1;
+}
+
+static struct memcons *uv_memcons;
+
+static ssize_t uv_msglog_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *to,
+ loff_t pos, size_t count)
+{
+ return memcons_copy(uv_memcons, to, pos, count);
+}
+
+static struct bin_attribute uv_msglog_attr = {
+ .attr = {.name = "msglog", .mode = 0400},
+ .read = uv_msglog_read
+};
+
+static int __init uv_init(void)
+{
+ struct device_node *node;
+
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ return 0;
+
+ node = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
+ if (!node)
+ return -ENODEV;
+
+ uv_memcons = memcons_init(node, "memcons");
+ of_node_put(node);
+ if (!uv_memcons)
+ return -ENOENT;
+
+ uv_msglog_attr.size = memcons_get_size(uv_memcons);
+
+ ultravisor_kobj = kobject_create_and_add("ultravisor", firmware_kobj);
+ if (!ultravisor_kobj)
+ return -ENOMEM;
+
+ return sysfs_create_bin_file(ultravisor_kobj, &uv_msglog_attr);
+}
+machine_subsys_initcall(powernv, uv_init);
diff --git a/arch/powerpc/platforms/powernv/vas-debug.c b/arch/powerpc/platforms/powernv/vas-debug.c
new file mode 100644
index 000000000..3ce89a4b5
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-debug.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/vas.h>
+#include "vas.h"
+
+static struct dentry *vas_debugfs;
+
+static char *cop_to_str(int cop)
+{
+ switch (cop) {
+ case VAS_COP_TYPE_FAULT: return "Fault";
+ case VAS_COP_TYPE_842: return "NX-842 Normal Priority";
+ case VAS_COP_TYPE_842_HIPRI: return "NX-842 High Priority";
+ case VAS_COP_TYPE_GZIP: return "NX-GZIP Normal Priority";
+ case VAS_COP_TYPE_GZIP_HIPRI: return "NX-GZIP High Priority";
+ case VAS_COP_TYPE_FTW: return "Fast Thread-wakeup";
+ default: return "Unknown";
+ }
+}
+
+static int info_show(struct seq_file *s, void *private)
+{
+ struct pnv_vas_window *window = s->private;
+
+ mutex_lock(&vas_mutex);
+
+ /* ensure window is not unmapped */
+ if (!window->hvwc_map)
+ goto unlock;
+
+ seq_printf(s, "Type: %s, %s\n", cop_to_str(window->vas_win.cop),
+ window->tx_win ? "Send" : "Receive");
+ seq_printf(s, "Pid : %d\n", vas_window_pid(&window->vas_win));
+
+unlock:
+ mutex_unlock(&vas_mutex);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(info);
+
+static inline void print_reg(struct seq_file *s, struct pnv_vas_window *win,
+ char *name, u32 reg)
+{
+ seq_printf(s, "0x%016llx %s\n", read_hvwc_reg(win, name, reg), name);
+}
+
+static int hvwc_show(struct seq_file *s, void *private)
+{
+ struct pnv_vas_window *window = s->private;
+
+ mutex_lock(&vas_mutex);
+
+ /* ensure window is not unmapped */
+ if (!window->hvwc_map)
+ goto unlock;
+
+ print_reg(s, window, VREG(LPID));
+ print_reg(s, window, VREG(PID));
+ print_reg(s, window, VREG(XLATE_MSR));
+ print_reg(s, window, VREG(XLATE_LPCR));
+ print_reg(s, window, VREG(XLATE_CTL));
+ print_reg(s, window, VREG(AMR));
+ print_reg(s, window, VREG(SEIDR));
+ print_reg(s, window, VREG(FAULT_TX_WIN));
+ print_reg(s, window, VREG(OSU_INTR_SRC_RA));
+ print_reg(s, window, VREG(HV_INTR_SRC_RA));
+ print_reg(s, window, VREG(PSWID));
+ print_reg(s, window, VREG(LFIFO_BAR));
+ print_reg(s, window, VREG(LDATA_STAMP_CTL));
+ print_reg(s, window, VREG(LDMA_CACHE_CTL));
+ print_reg(s, window, VREG(LRFIFO_PUSH));
+ print_reg(s, window, VREG(CURR_MSG_COUNT));
+ print_reg(s, window, VREG(LNOTIFY_AFTER_COUNT));
+ print_reg(s, window, VREG(LRX_WCRED));
+ print_reg(s, window, VREG(LRX_WCRED_ADDER));
+ print_reg(s, window, VREG(TX_WCRED));
+ print_reg(s, window, VREG(TX_WCRED_ADDER));
+ print_reg(s, window, VREG(LFIFO_SIZE));
+ print_reg(s, window, VREG(WINCTL));
+ print_reg(s, window, VREG(WIN_STATUS));
+ print_reg(s, window, VREG(WIN_CTX_CACHING_CTL));
+ print_reg(s, window, VREG(TX_RSVD_BUF_COUNT));
+ print_reg(s, window, VREG(LRFIFO_WIN_PTR));
+ print_reg(s, window, VREG(LNOTIFY_CTL));
+ print_reg(s, window, VREG(LNOTIFY_PID));
+ print_reg(s, window, VREG(LNOTIFY_LPID));
+ print_reg(s, window, VREG(LNOTIFY_TID));
+ print_reg(s, window, VREG(LNOTIFY_SCOPE));
+ print_reg(s, window, VREG(NX_UTIL_ADDER));
+unlock:
+ mutex_unlock(&vas_mutex);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(hvwc);
+
+void vas_window_free_dbgdir(struct pnv_vas_window *pnv_win)
+{
+ struct vas_window *window = &pnv_win->vas_win;
+
+ if (window->dbgdir) {
+ debugfs_remove_recursive(window->dbgdir);
+ kfree(window->dbgname);
+ window->dbgdir = NULL;
+ window->dbgname = NULL;
+ }
+}
+
+void vas_window_init_dbgdir(struct pnv_vas_window *window)
+{
+ struct dentry *d;
+
+ if (!window->vinst->dbgdir)
+ return;
+
+ window->vas_win.dbgname = kzalloc(16, GFP_KERNEL);
+ if (!window->vas_win.dbgname)
+ return;
+
+ snprintf(window->vas_win.dbgname, 16, "w%d", window->vas_win.winid);
+
+ d = debugfs_create_dir(window->vas_win.dbgname, window->vinst->dbgdir);
+ window->vas_win.dbgdir = d;
+
+ debugfs_create_file("info", 0444, d, window, &info_fops);
+ debugfs_create_file("hvwc", 0444, d, window, &hvwc_fops);
+}
+
+void vas_instance_init_dbgdir(struct vas_instance *vinst)
+{
+ struct dentry *d;
+
+ vas_init_dbgdir();
+
+ vinst->dbgname = kzalloc(16, GFP_KERNEL);
+ if (!vinst->dbgname)
+ return;
+
+ snprintf(vinst->dbgname, 16, "v%d", vinst->vas_id);
+
+ d = debugfs_create_dir(vinst->dbgname, vas_debugfs);
+ vinst->dbgdir = d;
+}
+
+/*
+ * Set up the "root" VAS debugfs dir. Return if we already set it up
+ * (or failed to) in an earlier instance of VAS.
+ */
+void vas_init_dbgdir(void)
+{
+ static bool first_time = true;
+
+ if (!first_time)
+ return;
+
+ first_time = false;
+ vas_debugfs = debugfs_create_dir("vas", NULL);
+}
diff --git a/arch/powerpc/platforms/powernv/vas-fault.c b/arch/powerpc/platforms/powernv/vas-fault.c
new file mode 100644
index 000000000..2b47d5a86
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * VAS Fault handling.
+ * Copyright 2019, IBM Corporation
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+#include <linux/mmu_context.h>
+#include <asm/icswx.h>
+
+#include "vas.h"
+
+/*
+ * The maximum FIFO size for fault window can be 8MB
+ * (VAS_RX_FIFO_SIZE_MAX). Using 4MB FIFO since each VAS
+ * instance will be having fault window.
+ * 8MB FIFO can be used if expects more faults for each VAS
+ * instance.
+ */
+#define VAS_FAULT_WIN_FIFO_SIZE (4 << 20)
+
+static void dump_fifo(struct vas_instance *vinst, void *entry)
+{
+ unsigned long *end = vinst->fault_fifo + vinst->fault_fifo_size;
+ unsigned long *fifo = entry;
+ int i;
+
+ pr_err("Fault fifo size %d, Max crbs %d\n", vinst->fault_fifo_size,
+ vinst->fault_fifo_size / CRB_SIZE);
+
+ /* Dump 10 CRB entries or until end of FIFO */
+ pr_err("Fault FIFO Dump:\n");
+ for (i = 0; i < 10*(CRB_SIZE/8) && fifo < end; i += 4, fifo += 4) {
+ pr_err("[%.3d, %p]: 0x%.16lx 0x%.16lx 0x%.16lx 0x%.16lx\n",
+ i, fifo, *fifo, *(fifo+1), *(fifo+2), *(fifo+3));
+ }
+}
+
+/*
+ * Process valid CRBs in fault FIFO.
+ * NX process user space requests, return credit and update the status
+ * in CRB. If it encounters transalation error when accessing CRB or
+ * request buffers, raises interrupt on the CPU to handle the fault.
+ * It takes credit on fault window, updates nx_fault_stamp in CRB with
+ * the following information and pastes CRB in fault FIFO.
+ *
+ * pswid - window ID of the window on which the request is sent.
+ * fault_storage_addr - fault address
+ *
+ * It can raise a single interrupt for multiple faults. Expects OS to
+ * process all valid faults and return credit for each fault on user
+ * space and fault windows. This fault FIFO control will be done with
+ * credit mechanism. NX can continuously paste CRBs until credits are not
+ * available on fault window. Otherwise, returns with RMA_reject.
+ *
+ * Total credits available on fault window: FIFO_SIZE(4MB)/CRBS_SIZE(128)
+ *
+ */
+irqreturn_t vas_fault_thread_fn(int irq, void *data)
+{
+ struct vas_instance *vinst = data;
+ struct coprocessor_request_block *crb, *entry;
+ struct coprocessor_request_block buf;
+ struct pnv_vas_window *window;
+ unsigned long flags;
+ void *fifo;
+
+ crb = &buf;
+
+ /*
+ * VAS can interrupt with multiple page faults. So process all
+ * valid CRBs within fault FIFO until reaches invalid CRB.
+ * We use CCW[0] and pswid to validate CRBs:
+ *
+ * CCW[0] Reserved bit. When NX pastes CRB, CCW[0]=0
+ * OS sets this bit to 1 after reading CRB.
+ * pswid NX assigns window ID. Set pswid to -1 after
+ * reading CRB from fault FIFO.
+ *
+ * We exit this function if no valid CRBs are available to process.
+ * So acquire fault_lock and reset fifo_in_progress to 0 before
+ * exit.
+ * In case kernel receives another interrupt with different page
+ * fault, interrupt handler returns with IRQ_HANDLED if
+ * fifo_in_progress is set. Means these new faults will be
+ * handled by the current thread. Otherwise set fifo_in_progress
+ * and return IRQ_WAKE_THREAD to wake up thread.
+ */
+ while (true) {
+ spin_lock_irqsave(&vinst->fault_lock, flags);
+ /*
+ * Advance the fault fifo pointer to next CRB.
+ * Use CRB_SIZE rather than sizeof(*crb) since the latter is
+ * aligned to CRB_ALIGN (256) but the CRB written to by VAS is
+ * only CRB_SIZE in len.
+ */
+ fifo = vinst->fault_fifo + (vinst->fault_crbs * CRB_SIZE);
+ entry = fifo;
+
+ if ((entry->stamp.nx.pswid == cpu_to_be32(FIFO_INVALID_ENTRY))
+ || (entry->ccw & cpu_to_be32(CCW0_INVALID))) {
+ vinst->fifo_in_progress = 0;
+ spin_unlock_irqrestore(&vinst->fault_lock, flags);
+ return IRQ_HANDLED;
+ }
+
+ spin_unlock_irqrestore(&vinst->fault_lock, flags);
+ vinst->fault_crbs++;
+ if (vinst->fault_crbs == (vinst->fault_fifo_size / CRB_SIZE))
+ vinst->fault_crbs = 0;
+
+ memcpy(crb, fifo, CRB_SIZE);
+ entry->stamp.nx.pswid = cpu_to_be32(FIFO_INVALID_ENTRY);
+ entry->ccw |= cpu_to_be32(CCW0_INVALID);
+ /*
+ * Return credit for the fault window.
+ */
+ vas_return_credit(vinst->fault_win, false);
+
+ pr_devel("VAS[%d] fault_fifo %p, fifo %p, fault_crbs %d\n",
+ vinst->vas_id, vinst->fault_fifo, fifo,
+ vinst->fault_crbs);
+
+ vas_dump_crb(crb);
+ window = vas_pswid_to_window(vinst,
+ be32_to_cpu(crb->stamp.nx.pswid));
+
+ if (IS_ERR(window)) {
+ /*
+ * We got an interrupt about a specific send
+ * window but we can't find that window and we can't
+ * even clean it up (return credit on user space
+ * window).
+ * But we should not get here.
+ * TODO: Disable IRQ.
+ */
+ dump_fifo(vinst, (void *)entry);
+ pr_err("VAS[%d] fault_fifo %p, fifo %p, pswid 0x%x, fault_crbs %d bad CRB?\n",
+ vinst->vas_id, vinst->fault_fifo, fifo,
+ be32_to_cpu(crb->stamp.nx.pswid),
+ vinst->fault_crbs);
+
+ WARN_ON_ONCE(1);
+ } else {
+ /*
+ * NX sees faults only with user space windows.
+ */
+ if (window->user_win)
+ vas_update_csb(crb, &window->vas_win.task_ref);
+ else
+ WARN_ON_ONCE(!window->user_win);
+
+ /*
+ * Return credit for send window after processing
+ * fault CRB.
+ */
+ vas_return_credit(window, true);
+ }
+ }
+}
+
+irqreturn_t vas_fault_handler(int irq, void *dev_id)
+{
+ struct vas_instance *vinst = dev_id;
+ irqreturn_t ret = IRQ_WAKE_THREAD;
+ unsigned long flags;
+
+ /*
+ * NX can generate an interrupt for multiple faults. So the
+ * fault handler thread process all CRBs until finds invalid
+ * entry. In case if NX sees continuous faults, it is possible
+ * that the thread function entered with the first interrupt
+ * can execute and process all valid CRBs.
+ * So wake up thread only if the fault thread is not in progress.
+ */
+ spin_lock_irqsave(&vinst->fault_lock, flags);
+
+ if (vinst->fifo_in_progress)
+ ret = IRQ_HANDLED;
+ else
+ vinst->fifo_in_progress = 1;
+
+ spin_unlock_irqrestore(&vinst->fault_lock, flags);
+
+ return ret;
+}
+
+/*
+ * Fault window is opened per VAS instance. NX pastes fault CRB in fault
+ * FIFO upon page faults.
+ */
+int vas_setup_fault_window(struct vas_instance *vinst)
+{
+ struct vas_rx_win_attr attr;
+ struct vas_window *win;
+
+ vinst->fault_fifo_size = VAS_FAULT_WIN_FIFO_SIZE;
+ vinst->fault_fifo = kzalloc(vinst->fault_fifo_size, GFP_KERNEL);
+ if (!vinst->fault_fifo) {
+ pr_err("Unable to alloc %d bytes for fault_fifo\n",
+ vinst->fault_fifo_size);
+ return -ENOMEM;
+ }
+
+ /*
+ * Invalidate all CRB entries. NX pastes valid entry for each fault.
+ */
+ memset(vinst->fault_fifo, FIFO_INVALID_ENTRY, vinst->fault_fifo_size);
+ vas_init_rx_win_attr(&attr, VAS_COP_TYPE_FAULT);
+
+ attr.rx_fifo_size = vinst->fault_fifo_size;
+ attr.rx_fifo = __pa(vinst->fault_fifo);
+
+ /*
+ * Max creds is based on number of CRBs can fit in the FIFO.
+ * (fault_fifo_size/CRB_SIZE). If 8MB FIFO is used, max creds
+ * will be 0xffff since the receive creds field is 16bits wide.
+ */
+ attr.wcreds_max = vinst->fault_fifo_size / CRB_SIZE;
+ attr.lnotify_lpid = 0;
+ attr.lnotify_pid = mfspr(SPRN_PID);
+ attr.lnotify_tid = mfspr(SPRN_PID);
+
+ win = vas_rx_win_open(vinst->vas_id, VAS_COP_TYPE_FAULT, &attr);
+ if (IS_ERR(win)) {
+ pr_err("VAS: Error %ld opening FaultWin\n", PTR_ERR(win));
+ kfree(vinst->fault_fifo);
+ return PTR_ERR(win);
+ }
+
+ vinst->fault_win = container_of(win, struct pnv_vas_window, vas_win);
+
+ pr_devel("VAS: Created FaultWin %d, LPID/PID/TID [%d/%d/%d]\n",
+ vinst->fault_win->vas_win.winid, attr.lnotify_lpid,
+ attr.lnotify_pid, attr.lnotify_tid);
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h
new file mode 100644
index 000000000..ca2e08f2d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-trace.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vas
+
+#if !defined(_VAS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _VAS_TRACE_H
+#include <linux/tracepoint.h>
+#include <linux/sched.h>
+#include <asm/vas.h>
+
+TRACE_EVENT( vas_rx_win_open,
+
+ TP_PROTO(struct task_struct *tsk,
+ int vasid,
+ int cop,
+ struct vas_rx_win_attr *rxattr),
+
+ TP_ARGS(tsk, vasid, cop, rxattr),
+
+ TP_STRUCT__entry(
+ __field(struct task_struct *, tsk)
+ __field(int, pid)
+ __field(int, cop)
+ __field(int, vasid)
+ __field(struct vas_rx_win_attr *, rxattr)
+ __field(int, lnotify_lpid)
+ __field(int, lnotify_pid)
+ __field(int, lnotify_tid)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = tsk->pid;
+ __entry->vasid = vasid;
+ __entry->cop = cop;
+ __entry->lnotify_lpid = rxattr->lnotify_lpid;
+ __entry->lnotify_pid = rxattr->lnotify_pid;
+ __entry->lnotify_tid = rxattr->lnotify_tid;
+ ),
+
+ TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pid=%d, tid=%d",
+ __entry->pid, __entry->vasid, __entry->cop,
+ __entry->lnotify_lpid, __entry->lnotify_pid,
+ __entry->lnotify_tid)
+);
+
+TRACE_EVENT( vas_tx_win_open,
+
+ TP_PROTO(struct task_struct *tsk,
+ int vasid,
+ int cop,
+ struct vas_tx_win_attr *txattr),
+
+ TP_ARGS(tsk, vasid, cop, txattr),
+
+ TP_STRUCT__entry(
+ __field(struct task_struct *, tsk)
+ __field(int, pid)
+ __field(int, cop)
+ __field(int, vasid)
+ __field(struct vas_tx_win_attr *, txattr)
+ __field(int, lpid)
+ __field(int, pidr)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = tsk->pid;
+ __entry->vasid = vasid;
+ __entry->cop = cop;
+ __entry->lpid = txattr->lpid;
+ __entry->pidr = txattr->pidr;
+ ),
+
+ TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pidr=%d",
+ __entry->pid, __entry->vasid, __entry->cop,
+ __entry->lpid, __entry->pidr)
+);
+
+TRACE_EVENT( vas_paste_crb,
+
+ TP_PROTO(struct task_struct *tsk,
+ struct pnv_vas_window *win),
+
+ TP_ARGS(tsk, win),
+
+ TP_STRUCT__entry(
+ __field(struct task_struct *, tsk)
+ __field(struct vas_window *, win)
+ __field(int, pid)
+ __field(int, vasid)
+ __field(int, winid)
+ __field(unsigned long, paste_kaddr)
+ ),
+
+ TP_fast_assign(
+ __entry->pid = tsk->pid;
+ __entry->vasid = win->vinst->vas_id;
+ __entry->winid = win->vas_win.winid;
+ __entry->paste_kaddr = (unsigned long)win->paste_kaddr
+ ),
+
+ TP_printk("pid=%d, vasid=%d, winid=%d, paste_kaddr=0x%016lx\n",
+ __entry->pid, __entry->vasid, __entry->winid,
+ __entry->paste_kaddr)
+);
+
+#endif /* _VAS_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/powerpc/platforms/powernv
+#define TRACE_INCLUDE_FILE vas-trace
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/platforms/powernv/vas-window.c b/arch/powerpc/platforms/powernv/vas-window.c
new file mode 100644
index 000000000..b66483800
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -0,0 +1,1471 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/rcupdate.h>
+#include <linux/cred.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/switch_to.h>
+#include <asm/ppc-opcode.h>
+#include <asm/vas.h>
+#include "vas.h"
+#include "copy-paste.h"
+
+#define CREATE_TRACE_POINTS
+#include "vas-trace.h"
+
+/*
+ * Compute the paste address region for the window @window using the
+ * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
+ */
+void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr, int *len)
+{
+ int winid;
+ u64 base, shift;
+
+ base = window->vinst->paste_base_addr;
+ shift = window->vinst->paste_win_id_shift;
+ winid = window->vas_win.winid;
+
+ *addr = base + (winid << shift);
+ if (len)
+ *len = PAGE_SIZE;
+
+ pr_debug("Txwin #%d: Paste addr 0x%llx\n", winid, *addr);
+}
+
+static inline void get_hvwc_mmio_bar(struct pnv_vas_window *window,
+ u64 *start, int *len)
+{
+ u64 pbaddr;
+
+ pbaddr = window->vinst->hvwc_bar_start;
+ *start = pbaddr + window->vas_win.winid * VAS_HVWC_SIZE;
+ *len = VAS_HVWC_SIZE;
+}
+
+static inline void get_uwc_mmio_bar(struct pnv_vas_window *window,
+ u64 *start, int *len)
+{
+ u64 pbaddr;
+
+ pbaddr = window->vinst->uwc_bar_start;
+ *start = pbaddr + window->vas_win.winid * VAS_UWC_SIZE;
+ *len = VAS_UWC_SIZE;
+}
+
+/*
+ * Map the paste bus address of the given send window into kernel address
+ * space. Unlike MMIO regions (map_mmio_region() below), paste region must
+ * be mapped cache-able and is only applicable to send windows.
+ */
+static void *map_paste_region(struct pnv_vas_window *txwin)
+{
+ int len;
+ void *map;
+ char *name;
+ u64 start;
+
+ name = kasprintf(GFP_KERNEL, "window-v%d-w%d", txwin->vinst->vas_id,
+ txwin->vas_win.winid);
+ if (!name)
+ goto free_name;
+
+ txwin->paste_addr_name = name;
+ vas_win_paste_addr(txwin, &start, &len);
+
+ if (!request_mem_region(start, len, name)) {
+ pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+ __func__, start, len);
+ goto free_name;
+ }
+
+ map = ioremap_cache(start, len);
+ if (!map) {
+ pr_devel("%s(): ioremap_cache(0x%llx, %d) failed\n", __func__,
+ start, len);
+ goto free_name;
+ }
+
+ pr_devel("Mapped paste addr 0x%llx to kaddr 0x%p\n", start, map);
+ return map;
+
+free_name:
+ kfree(name);
+ return ERR_PTR(-ENOMEM);
+}
+
+static void *map_mmio_region(char *name, u64 start, int len)
+{
+ void *map;
+
+ if (!request_mem_region(start, len, name)) {
+ pr_devel("%s(): request_mem_region(0x%llx, %d) failed\n",
+ __func__, start, len);
+ return NULL;
+ }
+
+ map = ioremap(start, len);
+ if (!map) {
+ pr_devel("%s(): ioremap(0x%llx, %d) failed\n", __func__, start,
+ len);
+ return NULL;
+ }
+
+ return map;
+}
+
+static void unmap_region(void *addr, u64 start, int len)
+{
+ iounmap(addr);
+ release_mem_region((phys_addr_t)start, len);
+}
+
+/*
+ * Unmap the paste address region for a window.
+ */
+static void unmap_paste_region(struct pnv_vas_window *window)
+{
+ int len;
+ u64 busaddr_start;
+
+ if (window->paste_kaddr) {
+ vas_win_paste_addr(window, &busaddr_start, &len);
+ unmap_region(window->paste_kaddr, busaddr_start, len);
+ window->paste_kaddr = NULL;
+ kfree(window->paste_addr_name);
+ window->paste_addr_name = NULL;
+ }
+}
+
+/*
+ * Unmap the MMIO regions for a window. Hold the vas_mutex so we don't
+ * unmap when the window's debugfs dir is in use. This serializes close
+ * of a window even on another VAS instance but since its not a critical
+ * path, just minimize the time we hold the mutex for now. We can add
+ * a per-instance mutex later if necessary.
+ */
+static void unmap_winctx_mmio_bars(struct pnv_vas_window *window)
+{
+ int len;
+ void *uwc_map;
+ void *hvwc_map;
+ u64 busaddr_start;
+
+ mutex_lock(&vas_mutex);
+
+ hvwc_map = window->hvwc_map;
+ window->hvwc_map = NULL;
+
+ uwc_map = window->uwc_map;
+ window->uwc_map = NULL;
+
+ mutex_unlock(&vas_mutex);
+
+ if (hvwc_map) {
+ get_hvwc_mmio_bar(window, &busaddr_start, &len);
+ unmap_region(hvwc_map, busaddr_start, len);
+ }
+
+ if (uwc_map) {
+ get_uwc_mmio_bar(window, &busaddr_start, &len);
+ unmap_region(uwc_map, busaddr_start, len);
+ }
+}
+
+/*
+ * Find the Hypervisor Window Context (HVWC) MMIO Base Address Region and the
+ * OS/User Window Context (UWC) MMIO Base Address Region for the given window.
+ * Map these bus addresses and save the mapped kernel addresses in @window.
+ */
+static int map_winctx_mmio_bars(struct pnv_vas_window *window)
+{
+ int len;
+ u64 start;
+
+ get_hvwc_mmio_bar(window, &start, &len);
+ window->hvwc_map = map_mmio_region("HVWCM_Window", start, len);
+
+ get_uwc_mmio_bar(window, &start, &len);
+ window->uwc_map = map_mmio_region("UWCM_Window", start, len);
+
+ if (!window->hvwc_map || !window->uwc_map) {
+ unmap_winctx_mmio_bars(window);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Reset all valid registers in the HV and OS/User Window Contexts for
+ * the window identified by @window.
+ *
+ * NOTE: We cannot really use a for loop to reset window context. Not all
+ * offsets in a window context are valid registers and the valid
+ * registers are not sequential. And, we can only write to offsets
+ * with valid registers.
+ */
+static void reset_window_regs(struct pnv_vas_window *window)
+{
+ write_hvwc_reg(window, VREG(LPID), 0ULL);
+ write_hvwc_reg(window, VREG(PID), 0ULL);
+ write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
+ write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
+ write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(AMR), 0ULL);
+ write_hvwc_reg(window, VREG(SEIDR), 0ULL);
+ write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
+ write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+ write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
+ write_hvwc_reg(window, VREG(PSWID), 0ULL);
+ write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
+ write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+ write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
+ write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
+ write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(LFIFO_SIZE), 0ULL);
+ write_hvwc_reg(window, VREG(WINCTL), 0ULL);
+ write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+ write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_CTL), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_PID), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_LPID), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_TID), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), 0ULL);
+ write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+
+ /* Skip read-only registers: NX_UTIL and NX_UTIL_SE */
+
+ /*
+ * The send and receive window credit adder registers are also
+ * accessible from HVWC and have been initialized above. We don't
+ * need to initialize from the OS/User Window Context, so skip
+ * following calls:
+ *
+ * write_uwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+ * write_uwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+ */
+}
+
+/*
+ * Initialize window context registers related to Address Translation.
+ * These registers are common to send/receive windows although they
+ * differ for user/kernel windows. As we resolve the TODOs we may
+ * want to add fields to vas_winctx and move the initialization to
+ * init_vas_winctx_regs().
+ */
+static void init_xlate_regs(struct pnv_vas_window *window, bool user_win)
+{
+ u64 lpcr, val;
+
+ /*
+ * MSR_TA, MSR_US are false for both kernel and user.
+ * MSR_DR and MSR_PR are false for kernel.
+ */
+ val = 0ULL;
+ val = SET_FIELD(VAS_XLATE_MSR_HV, val, 1);
+ val = SET_FIELD(VAS_XLATE_MSR_SF, val, 1);
+ if (user_win) {
+ val = SET_FIELD(VAS_XLATE_MSR_DR, val, 1);
+ val = SET_FIELD(VAS_XLATE_MSR_PR, val, 1);
+ }
+ write_hvwc_reg(window, VREG(XLATE_MSR), val);
+
+ lpcr = mfspr(SPRN_LPCR);
+ val = 0ULL;
+ /*
+ * NOTE: From Section 5.7.8.1 Segment Lookaside Buffer of the
+ * Power ISA, v3.0B, Page size encoding is 0 = 4KB, 5 = 64KB.
+ *
+ * NOTE: From Section 1.3.1, Address Translation Context of the
+ * Nest MMU Workbook, LPCR_SC should be 0 for Power9.
+ */
+ val = SET_FIELD(VAS_XLATE_LPCR_PAGE_SIZE, val, 5);
+ val = SET_FIELD(VAS_XLATE_LPCR_ISL, val, lpcr & LPCR_ISL);
+ val = SET_FIELD(VAS_XLATE_LPCR_TC, val, lpcr & LPCR_TC);
+ val = SET_FIELD(VAS_XLATE_LPCR_SC, val, 0);
+ write_hvwc_reg(window, VREG(XLATE_LPCR), val);
+
+ /*
+ * Section 1.3.1 (Address translation Context) of NMMU workbook.
+ * 0b00 Hashed Page Table mode
+ * 0b01 Reserved
+ * 0b10 Radix on HPT
+ * 0b11 Radix on Radix
+ */
+ val = 0ULL;
+ val = SET_FIELD(VAS_XLATE_MODE, val, radix_enabled() ? 3 : 2);
+ write_hvwc_reg(window, VREG(XLATE_CTL), val);
+
+ /*
+ * TODO: Can we mfspr(AMR) even for user windows?
+ */
+ val = 0ULL;
+ val = SET_FIELD(VAS_AMR, val, mfspr(SPRN_AMR));
+ write_hvwc_reg(window, VREG(AMR), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_SEIDR, val, 0);
+ write_hvwc_reg(window, VREG(SEIDR), val);
+}
+
+/*
+ * Initialize Reserved Send Buffer Count for the send window. It involves
+ * writing to the register, reading it back to confirm that the hardware
+ * has enough buffers to reserve. See section 1.3.1.2.1 of VAS workbook.
+ *
+ * Since we can only make a best-effort attempt to fulfill the request,
+ * we don't return any errors if we cannot.
+ *
+ * TODO: Reserved (aka dedicated) send buffers are not supported yet.
+ */
+static void init_rsvd_tx_buf_count(struct pnv_vas_window *txwin,
+ struct vas_winctx *winctx)
+{
+ write_hvwc_reg(txwin, VREG(TX_RSVD_BUF_COUNT), 0ULL);
+}
+
+/*
+ * init_winctx_regs()
+ * Initialize window context registers for a receive window.
+ * Except for caching control and marking window open, the registers
+ * are initialized in the order listed in Section 3.1.4 (Window Context
+ * Cache Register Details) of the VAS workbook although they don't need
+ * to be.
+ *
+ * Design note: For NX receive windows, NX allocates the FIFO buffer in OPAL
+ * (so that it can get a large contiguous area) and passes that buffer
+ * to kernel via device tree. We now write that buffer address to the
+ * FIFO BAR. Would it make sense to do this all in OPAL? i.e have OPAL
+ * write the per-chip RX FIFO addresses to the windows during boot-up
+ * as a one-time task? That could work for NX but what about other
+ * receivers? Let the receivers tell us the rx-fifo buffers for now.
+ */
+static void init_winctx_regs(struct pnv_vas_window *window,
+ struct vas_winctx *winctx)
+{
+ u64 val;
+ int fifo_size;
+
+ reset_window_regs(window);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LPID, val, winctx->lpid);
+ write_hvwc_reg(window, VREG(LPID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_PID_ID, val, winctx->pidr);
+ write_hvwc_reg(window, VREG(PID), val);
+
+ init_xlate_regs(window, winctx->user_win);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_FAULT_TX_WIN, val, winctx->fault_win_id);
+ write_hvwc_reg(window, VREG(FAULT_TX_WIN), val);
+
+ /* In PowerNV, interrupts go to HV. */
+ write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_HV_INTR_SRC_RA, val, winctx->irq_port);
+ write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_PSWID_EA_HANDLE, val, winctx->pswid);
+ write_hvwc_reg(window, VREG(PSWID), val);
+
+ write_hvwc_reg(window, VREG(SPARE1), 0ULL);
+ write_hvwc_reg(window, VREG(SPARE2), 0ULL);
+ write_hvwc_reg(window, VREG(SPARE3), 0ULL);
+
+ /*
+ * NOTE: VAS expects the FIFO address to be copied into the LFIFO_BAR
+ * register as is - do NOT shift the address into VAS_LFIFO_BAR
+ * bit fields! Ok to set the page migration select fields -
+ * VAS ignores the lower 10+ bits in the address anyway, because
+ * the minimum FIFO size is 1K?
+ *
+ * See also: Design note in function header.
+ */
+ val = winctx->rx_fifo;
+ val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0);
+ write_hvwc_reg(window, VREG(LFIFO_BAR), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LDATA_STAMP, val, winctx->data_stamp);
+ write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LDMA_TYPE, val, winctx->dma_type);
+ val = SET_FIELD(VAS_LDMA_FIFO_DISABLE, val, winctx->fifo_disable);
+ write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), val);
+
+ write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
+ write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
+ write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LRX_WCRED, val, winctx->wcreds_max);
+ write_hvwc_reg(window, VREG(LRX_WCRED), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_TX_WCRED, val, winctx->wcreds_max);
+ write_hvwc_reg(window, VREG(TX_WCRED), val);
+
+ write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
+
+ fifo_size = winctx->rx_fifo_size / 1024;
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LFIFO_SIZE, val, ilog2(fifo_size));
+ write_hvwc_reg(window, VREG(LFIFO_SIZE), val);
+
+ /* Update window control and caching control registers last so
+ * we mark the window open only after fully initializing it and
+ * pushing context to cache.
+ */
+
+ write_hvwc_reg(window, VREG(WIN_STATUS), 0ULL);
+
+ init_rsvd_tx_buf_count(window, winctx);
+
+ /* for a send window, point to the matching receive window */
+ val = 0ULL;
+ val = SET_FIELD(VAS_LRX_WIN_ID, val, winctx->rx_win_id);
+ write_hvwc_reg(window, VREG(LRFIFO_WIN_PTR), val);
+
+ write_hvwc_reg(window, VREG(SPARE4), 0ULL);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_NOTIFY_DISABLE, val, winctx->notify_disable);
+ val = SET_FIELD(VAS_INTR_DISABLE, val, winctx->intr_disable);
+ val = SET_FIELD(VAS_NOTIFY_EARLY, val, winctx->notify_early);
+ val = SET_FIELD(VAS_NOTIFY_OSU_INTR, val, winctx->notify_os_intr_reg);
+ write_hvwc_reg(window, VREG(LNOTIFY_CTL), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_PID, val, winctx->lnotify_pid);
+ write_hvwc_reg(window, VREG(LNOTIFY_PID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_LPID, val, winctx->lnotify_lpid);
+ write_hvwc_reg(window, VREG(LNOTIFY_LPID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_TID, val, winctx->lnotify_tid);
+ write_hvwc_reg(window, VREG(LNOTIFY_TID), val);
+
+ val = 0ULL;
+ val = SET_FIELD(VAS_LNOTIFY_MIN_SCOPE, val, winctx->min_scope);
+ val = SET_FIELD(VAS_LNOTIFY_MAX_SCOPE, val, winctx->max_scope);
+ write_hvwc_reg(window, VREG(LNOTIFY_SCOPE), val);
+
+ /* Skip read-only registers NX_UTIL and NX_UTIL_SE */
+
+ write_hvwc_reg(window, VREG(SPARE5), 0ULL);
+ write_hvwc_reg(window, VREG(NX_UTIL_ADDER), 0ULL);
+ write_hvwc_reg(window, VREG(SPARE6), 0ULL);
+
+ /* Finally, push window context to memory and... */
+ val = 0ULL;
+ val = SET_FIELD(VAS_PUSH_TO_MEM, val, 1);
+ write_hvwc_reg(window, VREG(WIN_CTX_CACHING_CTL), val);
+
+ /* ... mark the window open for business */
+ val = 0ULL;
+ val = SET_FIELD(VAS_WINCTL_REJ_NO_CREDIT, val, winctx->rej_no_credit);
+ val = SET_FIELD(VAS_WINCTL_PIN, val, winctx->pin_win);
+ val = SET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val, winctx->tx_wcred_mode);
+ val = SET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val, winctx->rx_wcred_mode);
+ val = SET_FIELD(VAS_WINCTL_TX_WORD_MODE, val, winctx->tx_word_mode);
+ val = SET_FIELD(VAS_WINCTL_RX_WORD_MODE, val, winctx->rx_word_mode);
+ val = SET_FIELD(VAS_WINCTL_FAULT_WIN, val, winctx->fault_win);
+ val = SET_FIELD(VAS_WINCTL_NX_WIN, val, winctx->nx_win);
+ val = SET_FIELD(VAS_WINCTL_OPEN, val, 1);
+ write_hvwc_reg(window, VREG(WINCTL), val);
+}
+
+static void vas_release_window_id(struct ida *ida, int winid)
+{
+ ida_free(ida, winid);
+}
+
+static int vas_assign_window_id(struct ida *ida)
+{
+ int winid = ida_alloc_max(ida, VAS_WINDOWS_PER_CHIP - 1, GFP_KERNEL);
+
+ if (winid == -ENOSPC) {
+ pr_err("Too many (%d) open windows\n", VAS_WINDOWS_PER_CHIP);
+ return -EAGAIN;
+ }
+
+ return winid;
+}
+
+static void vas_window_free(struct pnv_vas_window *window)
+{
+ struct vas_instance *vinst = window->vinst;
+ int winid = window->vas_win.winid;
+
+ unmap_winctx_mmio_bars(window);
+
+ vas_window_free_dbgdir(window);
+
+ kfree(window);
+
+ vas_release_window_id(&vinst->ida, winid);
+}
+
+static struct pnv_vas_window *vas_window_alloc(struct vas_instance *vinst)
+{
+ int winid;
+ struct pnv_vas_window *window;
+
+ winid = vas_assign_window_id(&vinst->ida);
+ if (winid < 0)
+ return ERR_PTR(winid);
+
+ window = kzalloc(sizeof(*window), GFP_KERNEL);
+ if (!window)
+ goto out_free;
+
+ window->vinst = vinst;
+ window->vas_win.winid = winid;
+
+ if (map_winctx_mmio_bars(window))
+ goto out_free;
+
+ vas_window_init_dbgdir(window);
+
+ return window;
+
+out_free:
+ kfree(window);
+ vas_release_window_id(&vinst->ida, winid);
+ return ERR_PTR(-ENOMEM);
+}
+
+static void put_rx_win(struct pnv_vas_window *rxwin)
+{
+ /* Better not be a send window! */
+ WARN_ON_ONCE(rxwin->tx_win);
+
+ atomic_dec(&rxwin->num_txwins);
+}
+
+/*
+ * Find the user space receive window given the @pswid.
+ * - We must have a valid vasid and it must belong to this instance.
+ * (so both send and receive windows are on the same VAS instance)
+ * - The window must refer to an OPEN, FTW, RECEIVE window.
+ *
+ * NOTE: We access ->windows[] table and assume that vinst->mutex is held.
+ */
+static struct pnv_vas_window *get_user_rxwin(struct vas_instance *vinst,
+ u32 pswid)
+{
+ int vasid, winid;
+ struct pnv_vas_window *rxwin;
+
+ decode_pswid(pswid, &vasid, &winid);
+
+ if (vinst->vas_id != vasid)
+ return ERR_PTR(-EINVAL);
+
+ rxwin = vinst->windows[winid];
+
+ if (!rxwin || rxwin->tx_win || rxwin->vas_win.cop != VAS_COP_TYPE_FTW)
+ return ERR_PTR(-EINVAL);
+
+ return rxwin;
+}
+
+/*
+ * Get the VAS receive window associated with NX engine identified
+ * by @cop and if applicable, @pswid.
+ *
+ * See also function header of set_vinst_win().
+ */
+static struct pnv_vas_window *get_vinst_rxwin(struct vas_instance *vinst,
+ enum vas_cop_type cop, u32 pswid)
+{
+ struct pnv_vas_window *rxwin;
+
+ mutex_lock(&vinst->mutex);
+
+ if (cop == VAS_COP_TYPE_FTW)
+ rxwin = get_user_rxwin(vinst, pswid);
+ else
+ rxwin = vinst->rxwin[cop] ?: ERR_PTR(-EINVAL);
+
+ if (!IS_ERR(rxwin))
+ atomic_inc(&rxwin->num_txwins);
+
+ mutex_unlock(&vinst->mutex);
+
+ return rxwin;
+}
+
+/*
+ * We have two tables of windows in a VAS instance. The first one,
+ * ->windows[], contains all the windows in the instance and allows
+ * looking up a window by its id. It is used to look up send windows
+ * during fault handling and receive windows when pairing user space
+ * send/receive windows.
+ *
+ * The second table, ->rxwin[], contains receive windows that are
+ * associated with NX engines. This table has VAS_COP_TYPE_MAX
+ * entries and is used to look up a receive window by its
+ * coprocessor type.
+ *
+ * Here, we save @window in the ->windows[] table. If it is a receive
+ * window, we also save the window in the ->rxwin[] table.
+ */
+static void set_vinst_win(struct vas_instance *vinst,
+ struct pnv_vas_window *window)
+{
+ int id = window->vas_win.winid;
+
+ mutex_lock(&vinst->mutex);
+
+ /*
+ * There should only be one receive window for a coprocessor type
+ * unless its a user (FTW) window.
+ */
+ if (!window->user_win && !window->tx_win) {
+ WARN_ON_ONCE(vinst->rxwin[window->vas_win.cop]);
+ vinst->rxwin[window->vas_win.cop] = window;
+ }
+
+ WARN_ON_ONCE(vinst->windows[id] != NULL);
+ vinst->windows[id] = window;
+
+ mutex_unlock(&vinst->mutex);
+}
+
+/*
+ * Clear this window from the table(s) of windows for this VAS instance.
+ * See also function header of set_vinst_win().
+ */
+static void clear_vinst_win(struct pnv_vas_window *window)
+{
+ int id = window->vas_win.winid;
+ struct vas_instance *vinst = window->vinst;
+
+ mutex_lock(&vinst->mutex);
+
+ if (!window->user_win && !window->tx_win) {
+ WARN_ON_ONCE(!vinst->rxwin[window->vas_win.cop]);
+ vinst->rxwin[window->vas_win.cop] = NULL;
+ }
+
+ WARN_ON_ONCE(vinst->windows[id] != window);
+ vinst->windows[id] = NULL;
+
+ mutex_unlock(&vinst->mutex);
+}
+
+static void init_winctx_for_rxwin(struct pnv_vas_window *rxwin,
+ struct vas_rx_win_attr *rxattr,
+ struct vas_winctx *winctx)
+{
+ /*
+ * We first zero (memset()) all fields and only set non-zero fields.
+ * Following fields are 0/false but maybe deserve a comment:
+ *
+ * ->notify_os_intr_reg In powerNV, send intrs to HV
+ * ->notify_disable False for NX windows
+ * ->intr_disable False for Fault Windows
+ * ->xtra_write False for NX windows
+ * ->notify_early NA for NX windows
+ * ->rsvd_txbuf_count NA for Rx windows
+ * ->lpid, ->pid, ->tid NA for Rx windows
+ */
+
+ memset(winctx, 0, sizeof(struct vas_winctx));
+
+ winctx->rx_fifo = rxattr->rx_fifo;
+ winctx->rx_fifo_size = rxattr->rx_fifo_size;
+ winctx->wcreds_max = rxwin->vas_win.wcreds_max;
+ winctx->pin_win = rxattr->pin_win;
+
+ winctx->nx_win = rxattr->nx_win;
+ winctx->fault_win = rxattr->fault_win;
+ winctx->user_win = rxattr->user_win;
+ winctx->rej_no_credit = rxattr->rej_no_credit;
+ winctx->rx_word_mode = rxattr->rx_win_ord_mode;
+ winctx->tx_word_mode = rxattr->tx_win_ord_mode;
+ winctx->rx_wcred_mode = rxattr->rx_wcred_mode;
+ winctx->tx_wcred_mode = rxattr->tx_wcred_mode;
+ winctx->notify_early = rxattr->notify_early;
+
+ if (winctx->nx_win) {
+ winctx->data_stamp = true;
+ winctx->intr_disable = true;
+ winctx->pin_win = true;
+
+ WARN_ON_ONCE(winctx->fault_win);
+ WARN_ON_ONCE(!winctx->rx_word_mode);
+ WARN_ON_ONCE(!winctx->tx_word_mode);
+ WARN_ON_ONCE(winctx->notify_after_count);
+ } else if (winctx->fault_win) {
+ winctx->notify_disable = true;
+ } else if (winctx->user_win) {
+ /*
+ * Section 1.8.1 Low Latency Core-Core Wake up of
+ * the VAS workbook:
+ *
+ * - disable credit checks ([tr]x_wcred_mode = false)
+ * - disable FIFO writes
+ * - enable ASB_Notify, disable interrupt
+ */
+ winctx->fifo_disable = true;
+ winctx->intr_disable = true;
+ winctx->rx_fifo = 0;
+ }
+
+ winctx->lnotify_lpid = rxattr->lnotify_lpid;
+ winctx->lnotify_pid = rxattr->lnotify_pid;
+ winctx->lnotify_tid = rxattr->lnotify_tid;
+ winctx->pswid = rxattr->pswid;
+ winctx->dma_type = VAS_DMA_TYPE_INJECT;
+ winctx->tc_mode = rxattr->tc_mode;
+
+ winctx->min_scope = VAS_SCOPE_LOCAL;
+ winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+ if (rxwin->vinst->virq)
+ winctx->irq_port = rxwin->vinst->irq_port;
+}
+
+static bool rx_win_args_valid(enum vas_cop_type cop,
+ struct vas_rx_win_attr *attr)
+{
+ pr_debug("Rxattr: fault %d, notify %d, intr %d, early %d, fifo %d\n",
+ attr->fault_win, attr->notify_disable,
+ attr->intr_disable, attr->notify_early,
+ attr->rx_fifo_size);
+
+ if (cop >= VAS_COP_TYPE_MAX)
+ return false;
+
+ if (cop != VAS_COP_TYPE_FTW &&
+ attr->rx_fifo_size < VAS_RX_FIFO_SIZE_MIN)
+ return false;
+
+ if (attr->rx_fifo_size > VAS_RX_FIFO_SIZE_MAX)
+ return false;
+
+ if (!attr->wcreds_max)
+ return false;
+
+ if (attr->nx_win) {
+ /* cannot be fault or user window if it is nx */
+ if (attr->fault_win || attr->user_win)
+ return false;
+ /*
+ * Section 3.1.4.32: NX Windows must not disable notification,
+ * and must not enable interrupts or early notification.
+ */
+ if (attr->notify_disable || !attr->intr_disable ||
+ attr->notify_early)
+ return false;
+ } else if (attr->fault_win) {
+ /* cannot be both fault and user window */
+ if (attr->user_win)
+ return false;
+
+ /*
+ * Section 3.1.4.32: Fault windows must disable notification
+ * but not interrupts.
+ */
+ if (!attr->notify_disable || attr->intr_disable)
+ return false;
+
+ } else if (attr->user_win) {
+ /*
+ * User receive windows are only for fast-thread-wakeup
+ * (FTW). They don't need a FIFO and must disable interrupts
+ */
+ if (attr->rx_fifo || attr->rx_fifo_size || !attr->intr_disable)
+ return false;
+ } else {
+ /* Rx window must be one of NX or Fault or User window. */
+ return false;
+ }
+
+ return true;
+}
+
+void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop)
+{
+ memset(rxattr, 0, sizeof(*rxattr));
+
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
+ cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
+ rxattr->pin_win = true;
+ rxattr->nx_win = true;
+ rxattr->fault_win = false;
+ rxattr->intr_disable = true;
+ rxattr->rx_wcred_mode = true;
+ rxattr->tx_wcred_mode = true;
+ rxattr->rx_win_ord_mode = true;
+ rxattr->tx_win_ord_mode = true;
+ } else if (cop == VAS_COP_TYPE_FAULT) {
+ rxattr->pin_win = true;
+ rxattr->fault_win = true;
+ rxattr->notify_disable = true;
+ rxattr->rx_wcred_mode = true;
+ rxattr->rx_win_ord_mode = true;
+ rxattr->rej_no_credit = true;
+ rxattr->tc_mode = VAS_THRESH_DISABLED;
+ } else if (cop == VAS_COP_TYPE_FTW) {
+ rxattr->user_win = true;
+ rxattr->intr_disable = true;
+
+ /*
+ * As noted in the VAS Workbook we disable credit checks.
+ * If we enable credit checks in the future, we must also
+ * implement a mechanism to return the user credits or new
+ * paste operations will fail.
+ */
+ }
+}
+EXPORT_SYMBOL_GPL(vas_init_rx_win_attr);
+
+struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+ struct vas_rx_win_attr *rxattr)
+{
+ struct pnv_vas_window *rxwin;
+ struct vas_winctx winctx;
+ struct vas_instance *vinst;
+
+ trace_vas_rx_win_open(current, vasid, cop, rxattr);
+
+ if (!rx_win_args_valid(cop, rxattr))
+ return ERR_PTR(-EINVAL);
+
+ vinst = find_vas_instance(vasid);
+ if (!vinst) {
+ pr_devel("vasid %d not found!\n", vasid);
+ return ERR_PTR(-EINVAL);
+ }
+ pr_devel("Found instance %d\n", vasid);
+
+ rxwin = vas_window_alloc(vinst);
+ if (IS_ERR(rxwin)) {
+ pr_devel("Unable to allocate memory for Rx window\n");
+ return (struct vas_window *)rxwin;
+ }
+
+ rxwin->tx_win = false;
+ rxwin->nx_win = rxattr->nx_win;
+ rxwin->user_win = rxattr->user_win;
+ rxwin->vas_win.cop = cop;
+ rxwin->vas_win.wcreds_max = rxattr->wcreds_max;
+
+ init_winctx_for_rxwin(rxwin, rxattr, &winctx);
+ init_winctx_regs(rxwin, &winctx);
+
+ set_vinst_win(vinst, rxwin);
+
+ return &rxwin->vas_win;
+}
+EXPORT_SYMBOL_GPL(vas_rx_win_open);
+
+void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr, enum vas_cop_type cop)
+{
+ memset(txattr, 0, sizeof(*txattr));
+
+ if (cop == VAS_COP_TYPE_842 || cop == VAS_COP_TYPE_842_HIPRI ||
+ cop == VAS_COP_TYPE_GZIP || cop == VAS_COP_TYPE_GZIP_HIPRI) {
+ txattr->rej_no_credit = false;
+ txattr->rx_wcred_mode = true;
+ txattr->tx_wcred_mode = true;
+ txattr->rx_win_ord_mode = true;
+ txattr->tx_win_ord_mode = true;
+ } else if (cop == VAS_COP_TYPE_FTW) {
+ txattr->user_win = true;
+ }
+}
+EXPORT_SYMBOL_GPL(vas_init_tx_win_attr);
+
+static void init_winctx_for_txwin(struct pnv_vas_window *txwin,
+ struct vas_tx_win_attr *txattr,
+ struct vas_winctx *winctx)
+{
+ /*
+ * We first zero all fields and only set non-zero ones. Following
+ * are some fields set to 0/false for the stated reason:
+ *
+ * ->notify_os_intr_reg In powernv, send intrs to HV
+ * ->rsvd_txbuf_count Not supported yet.
+ * ->notify_disable False for NX windows
+ * ->xtra_write False for NX windows
+ * ->notify_early NA for NX windows
+ * ->lnotify_lpid NA for Tx windows
+ * ->lnotify_pid NA for Tx windows
+ * ->lnotify_tid NA for Tx windows
+ * ->tx_win_cred_mode Ignore for now for NX windows
+ * ->rx_win_cred_mode Ignore for now for NX windows
+ */
+ memset(winctx, 0, sizeof(struct vas_winctx));
+
+ winctx->wcreds_max = txwin->vas_win.wcreds_max;
+
+ winctx->user_win = txattr->user_win;
+ winctx->nx_win = txwin->rxwin->nx_win;
+ winctx->pin_win = txattr->pin_win;
+ winctx->rej_no_credit = txattr->rej_no_credit;
+ winctx->rsvd_txbuf_enable = txattr->rsvd_txbuf_enable;
+
+ winctx->rx_wcred_mode = txattr->rx_wcred_mode;
+ winctx->tx_wcred_mode = txattr->tx_wcred_mode;
+ winctx->rx_word_mode = txattr->rx_win_ord_mode;
+ winctx->tx_word_mode = txattr->tx_win_ord_mode;
+ winctx->rsvd_txbuf_count = txattr->rsvd_txbuf_count;
+
+ winctx->intr_disable = true;
+ if (winctx->nx_win)
+ winctx->data_stamp = true;
+
+ winctx->lpid = txattr->lpid;
+ winctx->pidr = txattr->pidr;
+ winctx->rx_win_id = txwin->rxwin->vas_win.winid;
+ /*
+ * IRQ and fault window setup is successful. Set fault window
+ * for the send window so that ready to handle faults.
+ */
+ if (txwin->vinst->virq)
+ winctx->fault_win_id = txwin->vinst->fault_win->vas_win.winid;
+
+ winctx->dma_type = VAS_DMA_TYPE_INJECT;
+ winctx->tc_mode = txattr->tc_mode;
+ winctx->min_scope = VAS_SCOPE_LOCAL;
+ winctx->max_scope = VAS_SCOPE_VECTORED_GROUP;
+ if (txwin->vinst->virq)
+ winctx->irq_port = txwin->vinst->irq_port;
+
+ winctx->pswid = txattr->pswid ? txattr->pswid :
+ encode_pswid(txwin->vinst->vas_id,
+ txwin->vas_win.winid);
+}
+
+static bool tx_win_args_valid(enum vas_cop_type cop,
+ struct vas_tx_win_attr *attr)
+{
+ if (attr->tc_mode != VAS_THRESH_DISABLED)
+ return false;
+
+ if (cop > VAS_COP_TYPE_MAX)
+ return false;
+
+ if (attr->wcreds_max > VAS_TX_WCREDS_MAX)
+ return false;
+
+ if (attr->user_win) {
+ if (attr->rsvd_txbuf_count)
+ return false;
+
+ if (cop != VAS_COP_TYPE_FTW && cop != VAS_COP_TYPE_GZIP &&
+ cop != VAS_COP_TYPE_GZIP_HIPRI)
+ return false;
+ }
+
+ return true;
+}
+
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+ struct vas_tx_win_attr *attr)
+{
+ int rc;
+ struct pnv_vas_window *txwin;
+ struct pnv_vas_window *rxwin;
+ struct vas_winctx winctx;
+ struct vas_instance *vinst;
+
+ trace_vas_tx_win_open(current, vasid, cop, attr);
+
+ if (!tx_win_args_valid(cop, attr))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * If caller did not specify a vasid but specified the PSWID of a
+ * receive window (applicable only to FTW windows), use the vasid
+ * from that receive window.
+ */
+ if (vasid == -1 && attr->pswid)
+ decode_pswid(attr->pswid, &vasid, NULL);
+
+ vinst = find_vas_instance(vasid);
+ if (!vinst) {
+ pr_devel("vasid %d not found!\n", vasid);
+ return ERR_PTR(-EINVAL);
+ }
+
+ rxwin = get_vinst_rxwin(vinst, cop, attr->pswid);
+ if (IS_ERR(rxwin)) {
+ pr_devel("No RxWin for vasid %d, cop %d\n", vasid, cop);
+ return (struct vas_window *)rxwin;
+ }
+
+ txwin = vas_window_alloc(vinst);
+ if (IS_ERR(txwin)) {
+ rc = PTR_ERR(txwin);
+ goto put_rxwin;
+ }
+
+ txwin->vas_win.cop = cop;
+ txwin->tx_win = 1;
+ txwin->rxwin = rxwin;
+ txwin->nx_win = txwin->rxwin->nx_win;
+ txwin->user_win = attr->user_win;
+ txwin->vas_win.wcreds_max = attr->wcreds_max ?: VAS_WCREDS_DEFAULT;
+
+ init_winctx_for_txwin(txwin, attr, &winctx);
+
+ init_winctx_regs(txwin, &winctx);
+
+ /*
+ * If its a kernel send window, map the window address into the
+ * kernel's address space. For user windows, user must issue an
+ * mmap() to map the window into their address space.
+ *
+ * NOTE: If kernel ever resubmits a user CRB after handling a page
+ * fault, we will need to map this into kernel as well.
+ */
+ if (!txwin->user_win) {
+ txwin->paste_kaddr = map_paste_region(txwin);
+ if (IS_ERR(txwin->paste_kaddr)) {
+ rc = PTR_ERR(txwin->paste_kaddr);
+ goto free_window;
+ }
+ } else {
+ /*
+ * Interrupt hanlder or fault window setup failed. Means
+ * NX can not generate fault for page fault. So not
+ * opening for user space tx window.
+ */
+ if (!vinst->virq) {
+ rc = -ENODEV;
+ goto free_window;
+ }
+ rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+ if (rc)
+ goto free_window;
+
+ vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+ }
+
+ set_vinst_win(vinst, txwin);
+
+ return &txwin->vas_win;
+
+free_window:
+ vas_window_free(txwin);
+
+put_rxwin:
+ put_rx_win(rxwin);
+ return ERR_PTR(rc);
+
+}
+EXPORT_SYMBOL_GPL(vas_tx_win_open);
+
+int vas_copy_crb(void *crb, int offset)
+{
+ return vas_copy(crb, offset);
+}
+EXPORT_SYMBOL_GPL(vas_copy_crb);
+
+#define RMA_LSMP_REPORT_ENABLE PPC_BIT(53)
+int vas_paste_crb(struct vas_window *vwin, int offset, bool re)
+{
+ struct pnv_vas_window *txwin;
+ int rc;
+ void *addr;
+ uint64_t val;
+
+ txwin = container_of(vwin, struct pnv_vas_window, vas_win);
+ trace_vas_paste_crb(current, txwin);
+
+ /*
+ * Only NX windows are supported for now and hardware assumes
+ * report-enable flag is set for NX windows. Ensure software
+ * complies too.
+ */
+ WARN_ON_ONCE(txwin->nx_win && !re);
+
+ addr = txwin->paste_kaddr;
+ if (re) {
+ /*
+ * Set the REPORT_ENABLE bit (equivalent to writing
+ * to 1K offset of the paste address)
+ */
+ val = SET_FIELD(RMA_LSMP_REPORT_ENABLE, 0ULL, 1);
+ addr += val;
+ }
+
+ /*
+ * Map the raw CR value from vas_paste() to an error code (there
+ * is just pass or fail for now though).
+ */
+ rc = vas_paste(addr, offset);
+ if (rc == 2)
+ rc = 0;
+ else
+ rc = -EINVAL;
+
+ pr_debug("Txwin #%d: Msg count %llu\n", txwin->vas_win.winid,
+ read_hvwc_reg(txwin, VREG(LRFIFO_PUSH)));
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(vas_paste_crb);
+
+/*
+ * If credit checking is enabled for this window, poll for the return
+ * of window credits (i.e for NX engines to process any outstanding CRBs).
+ * Since NX-842 waits for the CRBs to be processed before closing the
+ * window, we should not have to wait for too long.
+ *
+ * TODO: We retry in 10ms intervals now. We could/should probably peek at
+ * the VAS_LRFIFO_PUSH_OFFSET register to get an estimate of pending
+ * CRBs on the FIFO and compute the delay dynamically on each retry.
+ * But that is not really needed until we support NX-GZIP access from
+ * user space. (NX-842 driver waits for CSB and Fast thread-wakeup
+ * doesn't use credit checking).
+ */
+static void poll_window_credits(struct pnv_vas_window *window)
+{
+ u64 val;
+ int creds, mode;
+ int count = 0;
+
+ val = read_hvwc_reg(window, VREG(WINCTL));
+ if (window->tx_win)
+ mode = GET_FIELD(VAS_WINCTL_TX_WCRED_MODE, val);
+ else
+ mode = GET_FIELD(VAS_WINCTL_RX_WCRED_MODE, val);
+
+ if (!mode)
+ return;
+retry:
+ if (window->tx_win) {
+ val = read_hvwc_reg(window, VREG(TX_WCRED));
+ creds = GET_FIELD(VAS_TX_WCRED, val);
+ } else {
+ val = read_hvwc_reg(window, VREG(LRX_WCRED));
+ creds = GET_FIELD(VAS_LRX_WCRED, val);
+ }
+
+ /*
+ * Takes around few milliseconds to complete all pending requests
+ * and return credits.
+ * TODO: Scan fault FIFO and invalidate CRBs points to this window
+ * and issue CRB Kill to stop all pending requests. Need only
+ * if there is a bug in NX or fault handling in kernel.
+ */
+ if (creds < window->vas_win.wcreds_max) {
+ val = 0;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(msecs_to_jiffies(10));
+ count++;
+ /*
+ * Process can not close send window until all credits are
+ * returned.
+ */
+ if (!(count % 1000))
+ pr_warn_ratelimited("VAS: pid %d stuck. Waiting for credits returned for Window(%d). creds %d, Retries %d\n",
+ vas_window_pid(&window->vas_win),
+ window->vas_win.winid,
+ creds, count);
+
+ goto retry;
+ }
+}
+
+/*
+ * Wait for the window to go to "not-busy" state. It should only take a
+ * short time to queue a CRB, so window should not be busy for too long.
+ * Trying 5ms intervals.
+ */
+static void poll_window_busy_state(struct pnv_vas_window *window)
+{
+ int busy;
+ u64 val;
+ int count = 0;
+
+retry:
+ val = read_hvwc_reg(window, VREG(WIN_STATUS));
+ busy = GET_FIELD(VAS_WIN_BUSY, val);
+ if (busy) {
+ val = 0;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(msecs_to_jiffies(10));
+ count++;
+ /*
+ * Takes around few milliseconds to process all pending
+ * requests.
+ */
+ if (!(count % 1000))
+ pr_warn_ratelimited("VAS: pid %d stuck. Window (ID=%d) is in busy state. Retries %d\n",
+ vas_window_pid(&window->vas_win),
+ window->vas_win.winid, count);
+
+ goto retry;
+ }
+}
+
+/*
+ * Have the hardware cast a window out of cache and wait for it to
+ * be completed.
+ *
+ * NOTE: It can take a relatively long time to cast the window context
+ * out of the cache. It is not strictly necessary to cast out if:
+ *
+ * - we clear the "Pin Window" bit (so hardware is free to evict)
+ *
+ * - we re-initialize the window context when it is reassigned.
+ *
+ * We do the former in vas_win_close() and latter in vas_win_open().
+ * So, ignoring the cast-out for now. We can add it as needed. If
+ * casting out becomes necessary we should consider offloading the
+ * job to a worker thread, so the window close can proceed quickly.
+ */
+static void poll_window_castout(struct pnv_vas_window *window)
+{
+ /* stub for now */
+}
+
+/*
+ * Unpin and close a window so no new requests are accepted and the
+ * hardware can evict this window from cache if necessary.
+ */
+static void unpin_close_window(struct pnv_vas_window *window)
+{
+ u64 val;
+
+ val = read_hvwc_reg(window, VREG(WINCTL));
+ val = SET_FIELD(VAS_WINCTL_PIN, val, 0);
+ val = SET_FIELD(VAS_WINCTL_OPEN, val, 0);
+ write_hvwc_reg(window, VREG(WINCTL), val);
+}
+
+/*
+ * Close a window.
+ *
+ * See Section 1.12.1 of VAS workbook v1.05 for details on closing window:
+ * - Disable new paste operations (unmap paste address)
+ * - Poll for the "Window Busy" bit to be cleared
+ * - Clear the Open/Enable bit for the Window.
+ * - Poll for return of window Credits (implies FIFO empty for Rx win?)
+ * - Unpin and cast window context out of cache
+ *
+ * Besides the hardware, kernel has some bookkeeping of course.
+ */
+int vas_win_close(struct vas_window *vwin)
+{
+ struct pnv_vas_window *window;
+
+ if (!vwin)
+ return 0;
+
+ window = container_of(vwin, struct pnv_vas_window, vas_win);
+
+ if (!window->tx_win && atomic_read(&window->num_txwins) != 0) {
+ pr_devel("Attempting to close an active Rx window!\n");
+ WARN_ON_ONCE(1);
+ return -EBUSY;
+ }
+
+ unmap_paste_region(window);
+
+ poll_window_busy_state(window);
+
+ unpin_close_window(window);
+
+ poll_window_credits(window);
+
+ clear_vinst_win(window);
+
+ poll_window_castout(window);
+
+ /* if send window, drop reference to matching receive window */
+ if (window->tx_win) {
+ if (window->user_win) {
+ mm_context_remove_vas_window(vwin->task_ref.mm);
+ put_vas_user_win_ref(&vwin->task_ref);
+ }
+ put_rx_win(window->rxwin);
+ }
+
+ vas_window_free(window);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vas_win_close);
+
+/*
+ * Return credit for the given window.
+ * Send windows and fault window uses credit mechanism as follows:
+ *
+ * Send windows:
+ * - The default number of credits available for each send window is
+ * 1024. It means 1024 requests can be issued asynchronously at the
+ * same time. If the credit is not available, that request will be
+ * returned with RMA_Busy.
+ * - One credit is taken when NX request is issued.
+ * - This credit is returned after NX processed that request.
+ * - If NX encounters translation error, kernel will return the
+ * credit on the specific send window after processing the fault CRB.
+ *
+ * Fault window:
+ * - The total number credits available is FIFO_SIZE/CRB_SIZE.
+ * Means 4MB/128 in the current implementation. If credit is not
+ * available, RMA_Reject is returned.
+ * - A credit is taken when NX pastes CRB in fault FIFO.
+ * - The kernel with return credit on fault window after reading entry
+ * from fault FIFO.
+ */
+void vas_return_credit(struct pnv_vas_window *window, bool tx)
+{
+ uint64_t val;
+
+ val = 0ULL;
+ if (tx) { /* send window */
+ val = SET_FIELD(VAS_TX_WCRED, val, 1);
+ write_hvwc_reg(window, VREG(TX_WCRED_ADDER), val);
+ } else {
+ val = SET_FIELD(VAS_LRX_WCRED, val, 1);
+ write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), val);
+ }
+}
+
+struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+ uint32_t pswid)
+{
+ struct pnv_vas_window *window;
+ int winid;
+
+ if (!pswid) {
+ pr_devel("%s: called for pswid 0!\n", __func__);
+ return ERR_PTR(-ESRCH);
+ }
+
+ decode_pswid(pswid, NULL, &winid);
+
+ if (winid >= VAS_WINDOWS_PER_CHIP)
+ return ERR_PTR(-ESRCH);
+
+ /*
+ * If application closes the window before the hardware
+ * returns the fault CRB, we should wait in vas_win_close()
+ * for the pending requests. so the window must be active
+ * and the process alive.
+ *
+ * If its a kernel process, we should not get any faults and
+ * should not get here.
+ */
+ window = vinst->windows[winid];
+
+ if (!window) {
+ pr_err("PSWID decode: Could not find window for winid %d pswid %d vinst 0x%p\n",
+ winid, pswid, vinst);
+ return NULL;
+ }
+
+ /*
+ * Do some sanity checks on the decoded window. Window should be
+ * NX GZIP user send window. FTW windows should not incur faults
+ * since their CRBs are ignored (not queued on FIFO or processed
+ * by NX).
+ */
+ if (!window->tx_win || !window->user_win || !window->nx_win ||
+ window->vas_win.cop == VAS_COP_TYPE_FAULT ||
+ window->vas_win.cop == VAS_COP_TYPE_FTW) {
+ pr_err("PSWID decode: id %d, tx %d, user %d, nx %d, cop %d\n",
+ winid, window->tx_win, window->user_win,
+ window->nx_win, window->vas_win.cop);
+ WARN_ON(1);
+ }
+
+ return window;
+}
+
+static struct vas_window *vas_user_win_open(int vas_id, u64 flags,
+ enum vas_cop_type cop_type)
+{
+ struct vas_tx_win_attr txattr = {};
+
+ vas_init_tx_win_attr(&txattr, cop_type);
+
+ txattr.lpid = mfspr(SPRN_LPID);
+ txattr.pidr = mfspr(SPRN_PID);
+ txattr.user_win = true;
+ txattr.rsvd_txbuf_count = false;
+ txattr.pswid = false;
+
+ pr_devel("Pid %d: Opening txwin, PIDR %ld\n", txattr.pidr,
+ mfspr(SPRN_PID));
+
+ return vas_tx_win_open(vas_id, cop_type, &txattr);
+}
+
+static u64 vas_user_win_paste_addr(struct vas_window *txwin)
+{
+ struct pnv_vas_window *win;
+ u64 paste_addr;
+
+ win = container_of(txwin, struct pnv_vas_window, vas_win);
+ vas_win_paste_addr(win, &paste_addr, NULL);
+
+ return paste_addr;
+}
+
+static int vas_user_win_close(struct vas_window *txwin)
+{
+ vas_win_close(txwin);
+
+ return 0;
+}
+
+static const struct vas_user_win_ops vops = {
+ .open_win = vas_user_win_open,
+ .paste_addr = vas_user_win_paste_addr,
+ .close_win = vas_user_win_close,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type,
+ const char *name)
+{
+
+ return vas_register_coproc_api(mod, cop_type, name, &vops);
+}
+EXPORT_SYMBOL_GPL(vas_register_api_powernv);
+
+void vas_unregister_api_powernv(void)
+{
+ vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_powernv);
diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c
new file mode 100644
index 000000000..b65256a63
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/interrupt.h>
+#include <asm/prom.h>
+#include <asm/xive.h>
+
+#include "vas.h"
+
+DEFINE_MUTEX(vas_mutex);
+static LIST_HEAD(vas_instances);
+
+static DEFINE_PER_CPU(int, cpu_vas_id);
+
+static int vas_irq_fault_window_setup(struct vas_instance *vinst)
+{
+ int rc = 0;
+
+ rc = request_threaded_irq(vinst->virq, vas_fault_handler,
+ vas_fault_thread_fn, 0, vinst->name, vinst);
+
+ if (rc) {
+ pr_err("VAS[%d]: Request IRQ(%d) failed with %d\n",
+ vinst->vas_id, vinst->virq, rc);
+ goto out;
+ }
+
+ rc = vas_setup_fault_window(vinst);
+ if (rc)
+ free_irq(vinst->virq, vinst);
+
+out:
+ return rc;
+}
+
+static int init_vas_instance(struct platform_device *pdev)
+{
+ struct device_node *dn = pdev->dev.of_node;
+ struct vas_instance *vinst;
+ struct xive_irq_data *xd;
+ uint32_t chipid, hwirq;
+ struct resource *res;
+ int rc, cpu, vasid;
+
+ rc = of_property_read_u32(dn, "ibm,vas-id", &vasid);
+ if (rc) {
+ pr_err("No ibm,vas-id property for %s?\n", pdev->name);
+ return -ENODEV;
+ }
+
+ rc = of_property_read_u32(dn, "ibm,chip-id", &chipid);
+ if (rc) {
+ pr_err("No ibm,chip-id property for %s?\n", pdev->name);
+ return -ENODEV;
+ }
+
+ if (pdev->num_resources != 4) {
+ pr_err("Unexpected DT configuration for [%s, %d]\n",
+ pdev->name, vasid);
+ return -ENODEV;
+ }
+
+ vinst = kzalloc(sizeof(*vinst), GFP_KERNEL);
+ if (!vinst)
+ return -ENOMEM;
+
+ vinst->name = kasprintf(GFP_KERNEL, "vas-%d", vasid);
+ if (!vinst->name) {
+ kfree(vinst);
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&vinst->node);
+ ida_init(&vinst->ida);
+ mutex_init(&vinst->mutex);
+ vinst->vas_id = vasid;
+ vinst->pdev = pdev;
+
+ res = &pdev->resource[0];
+ vinst->hvwc_bar_start = res->start;
+
+ res = &pdev->resource[1];
+ vinst->uwc_bar_start = res->start;
+
+ res = &pdev->resource[2];
+ vinst->paste_base_addr = res->start;
+
+ res = &pdev->resource[3];
+ if (res->end > 62) {
+ pr_err("Bad 'paste_win_id_shift' in DT, %llx\n", res->end);
+ goto free_vinst;
+ }
+
+ vinst->paste_win_id_shift = 63 - res->end;
+
+ hwirq = xive_native_alloc_irq_on_chip(chipid);
+ if (!hwirq) {
+ pr_err("Inst%d: Unable to allocate global irq for chip %d\n",
+ vinst->vas_id, chipid);
+ return -ENOENT;
+ }
+
+ vinst->virq = irq_create_mapping(NULL, hwirq);
+ if (!vinst->virq) {
+ pr_err("Inst%d: Unable to map global irq %d\n",
+ vinst->vas_id, hwirq);
+ return -EINVAL;
+ }
+
+ xd = irq_get_handler_data(vinst->virq);
+ if (!xd) {
+ pr_err("Inst%d: Invalid virq %d\n",
+ vinst->vas_id, vinst->virq);
+ return -EINVAL;
+ }
+
+ vinst->irq_port = xd->trig_page;
+ pr_devel("Initialized instance [%s, %d] paste_base 0x%llx paste_win_id_shift 0x%llx IRQ %d Port 0x%llx\n",
+ pdev->name, vasid, vinst->paste_base_addr,
+ vinst->paste_win_id_shift, vinst->virq,
+ vinst->irq_port);
+
+ for_each_possible_cpu(cpu) {
+ if (cpu_to_chip_id(cpu) == of_get_ibm_chip_id(dn))
+ per_cpu(cpu_vas_id, cpu) = vasid;
+ }
+
+ mutex_lock(&vas_mutex);
+ list_add(&vinst->node, &vas_instances);
+ mutex_unlock(&vas_mutex);
+
+ spin_lock_init(&vinst->fault_lock);
+ /*
+ * IRQ and fault handling setup is needed only for user space
+ * send windows.
+ */
+ if (vinst->virq) {
+ rc = vas_irq_fault_window_setup(vinst);
+ /*
+ * Fault window is used only for user space send windows.
+ * So if vinst->virq is NULL, tx_win_open returns -ENODEV
+ * for user space.
+ */
+ if (rc)
+ vinst->virq = 0;
+ }
+
+ vas_instance_init_dbgdir(vinst);
+
+ dev_set_drvdata(&pdev->dev, vinst);
+
+ return 0;
+
+free_vinst:
+ kfree(vinst->name);
+ kfree(vinst);
+ return -ENODEV;
+
+}
+
+/*
+ * Although this is read/used multiple times, it is written to only
+ * during initialization.
+ */
+struct vas_instance *find_vas_instance(int vasid)
+{
+ struct list_head *ent;
+ struct vas_instance *vinst;
+
+ mutex_lock(&vas_mutex);
+
+ if (vasid == -1)
+ vasid = per_cpu(cpu_vas_id, smp_processor_id());
+
+ list_for_each(ent, &vas_instances) {
+ vinst = list_entry(ent, struct vas_instance, node);
+ if (vinst->vas_id == vasid) {
+ mutex_unlock(&vas_mutex);
+ return vinst;
+ }
+ }
+ mutex_unlock(&vas_mutex);
+
+ pr_devel("Instance %d not found\n", vasid);
+ return NULL;
+}
+
+int chip_to_vas_id(int chipid)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu_to_chip_id(cpu) == chipid)
+ return per_cpu(cpu_vas_id, cpu);
+ }
+ return -1;
+}
+EXPORT_SYMBOL(chip_to_vas_id);
+
+static int vas_probe(struct platform_device *pdev)
+{
+ return init_vas_instance(pdev);
+}
+
+static const struct of_device_id powernv_vas_match[] = {
+ { .compatible = "ibm,vas",},
+ {},
+};
+
+static struct platform_driver vas_driver = {
+ .driver = {
+ .name = "vas",
+ .of_match_table = powernv_vas_match,
+ },
+ .probe = vas_probe,
+};
+
+static int __init vas_init(void)
+{
+ int found = 0;
+ struct device_node *dn;
+
+ platform_driver_register(&vas_driver);
+
+ for_each_compatible_node(dn, NULL, "ibm,vas") {
+ of_platform_device_create(dn, NULL, NULL);
+ found++;
+ }
+
+ if (!found) {
+ platform_driver_unregister(&vas_driver);
+ return -ENODEV;
+ }
+
+ pr_devel("Found %d instances\n", found);
+
+ return 0;
+}
+device_initcall(vas_init);
diff --git a/arch/powerpc/platforms/powernv/vas.h b/arch/powerpc/platforms/powernv/vas.h
new file mode 100644
index 000000000..08d9d3d5a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -0,0 +1,501 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <linux/atomic.h>
+#include <linux/idr.h>
+#include <asm/vas.h>
+#include <linux/io.h>
+#include <linux/dcache.h>
+#include <linux/mutex.h>
+#include <linux/stringify.h>
+
+/*
+ * Overview of Virtual Accelerator Switchboard (VAS).
+ *
+ * VAS is a hardware "switchboard" that allows senders and receivers to
+ * exchange messages with _minimal_ kernel involvment. The receivers are
+ * typically NX coprocessor engines that perform compression or encryption
+ * in hardware, but receivers can also be other software threads.
+ *
+ * Senders are user/kernel threads that submit compression/encryption or
+ * other requests to the receivers. Senders must format their messages as
+ * Coprocessor Request Blocks (CRB)s and submit them using the "copy" and
+ * "paste" instructions which were introduced in Power9.
+ *
+ * A Power node can have (upto?) 8 Power chips. There is one instance of
+ * VAS in each Power9 chip. Each instance of VAS has 64K windows or ports,
+ * Senders and receivers must each connect to a separate window before they
+ * can exchange messages through the switchboard.
+ *
+ * Each window is described by two types of window contexts:
+ *
+ * Hypervisor Window Context (HVWC) of size VAS_HVWC_SIZE bytes
+ *
+ * OS/User Window Context (UWC) of size VAS_UWC_SIZE bytes.
+ *
+ * A window context can be viewed as a set of 64-bit registers. The settings
+ * in these registers configure/control/determine the behavior of the VAS
+ * hardware when messages are sent/received through the window. The registers
+ * in the HVWC are configured by the kernel while the registers in the UWC can
+ * be configured by the kernel or by the user space application that is using
+ * the window.
+ *
+ * The HVWCs for all windows on a specific instance of VAS are in a contiguous
+ * range of hardware addresses or Base address region (BAR) referred to as the
+ * HVWC BAR for the instance. Similarly the UWCs for all windows on an instance
+ * are referred to as the UWC BAR for the instance.
+ *
+ * The two BARs for each instance are defined Power9 MMIO Ranges spreadsheet
+ * and available to the kernel in the VAS node's "reg" property in the device
+ * tree:
+ *
+ * /proc/device-tree/vasm@.../reg
+ *
+ * (see vas_probe() for details on the reg property).
+ *
+ * The kernel maps the HVWC and UWC BAR regions into the kernel address
+ * space (hvwc_map and uwc_map). The kernel can then access the window
+ * contexts of a specific window using:
+ *
+ * hvwc = hvwc_map + winid * VAS_HVWC_SIZE.
+ * uwc = uwc_map + winid * VAS_UWC_SIZE.
+ *
+ * where winid is the window index (0..64K).
+ *
+ * As mentioned, a window context is used to "configure" a window. Besides
+ * this configuration address, each _send_ window also has a unique hardware
+ * "paste" address that is used to submit requests/CRBs (see vas_paste_crb()).
+ *
+ * The hardware paste address for a window is computed using the "paste
+ * base address" and "paste win id shift" reg properties in the VAS device
+ * tree node using:
+ *
+ * paste_addr = paste_base + ((winid << paste_win_id_shift))
+ *
+ * (again, see vas_probe() for ->paste_base_addr and ->paste_win_id_shift).
+ *
+ * The kernel maps this hardware address into the sender's address space
+ * after which they can use the 'paste' instruction (new in Power9) to
+ * send a message (submit a request aka CRB) to the coprocessor.
+ *
+ * NOTE: In the initial version, senders can only in-kernel drivers/threads.
+ * Support for user space threads will be added in follow-on patches.
+ *
+ * TODO: Do we need to map the UWC into user address space so they can return
+ * credits? Its NA for NX but may be needed for other receive windows.
+ *
+ */
+
+#define VAS_WINDOWS_PER_CHIP (64 << 10)
+
+/*
+ * Hypervisor and OS/USer Window Context sizes
+ */
+#define VAS_HVWC_SIZE 512
+#define VAS_UWC_SIZE PAGE_SIZE
+
+/*
+ * Initial per-process credits.
+ * Max send window credits: 4K-1 (12-bits in VAS_TX_WCRED)
+ *
+ * TODO: Needs tuning for per-process credits
+ */
+#define VAS_TX_WCREDS_MAX ((4 << 10) - 1)
+#define VAS_WCREDS_DEFAULT (1 << 10)
+
+/*
+ * VAS Window Context Register Offsets and bitmasks.
+ * See Section 3.1.4 of VAS Work book
+ */
+#define VAS_LPID_OFFSET 0x010
+#define VAS_LPID PPC_BITMASK(0, 11)
+
+#define VAS_PID_OFFSET 0x018
+#define VAS_PID_ID PPC_BITMASK(0, 19)
+
+#define VAS_XLATE_MSR_OFFSET 0x020
+#define VAS_XLATE_MSR_DR PPC_BIT(0)
+#define VAS_XLATE_MSR_TA PPC_BIT(1)
+#define VAS_XLATE_MSR_PR PPC_BIT(2)
+#define VAS_XLATE_MSR_US PPC_BIT(3)
+#define VAS_XLATE_MSR_HV PPC_BIT(4)
+#define VAS_XLATE_MSR_SF PPC_BIT(5)
+
+#define VAS_XLATE_LPCR_OFFSET 0x028
+#define VAS_XLATE_LPCR_PAGE_SIZE PPC_BITMASK(0, 2)
+#define VAS_XLATE_LPCR_ISL PPC_BIT(3)
+#define VAS_XLATE_LPCR_TC PPC_BIT(4)
+#define VAS_XLATE_LPCR_SC PPC_BIT(5)
+
+#define VAS_XLATE_CTL_OFFSET 0x030
+#define VAS_XLATE_MODE PPC_BITMASK(0, 1)
+
+#define VAS_AMR_OFFSET 0x040
+#define VAS_AMR PPC_BITMASK(0, 63)
+
+#define VAS_SEIDR_OFFSET 0x048
+#define VAS_SEIDR PPC_BITMASK(0, 63)
+
+#define VAS_FAULT_TX_WIN_OFFSET 0x050
+#define VAS_FAULT_TX_WIN PPC_BITMASK(48, 63)
+
+#define VAS_OSU_INTR_SRC_RA_OFFSET 0x060
+#define VAS_OSU_INTR_SRC_RA PPC_BITMASK(8, 63)
+
+#define VAS_HV_INTR_SRC_RA_OFFSET 0x070
+#define VAS_HV_INTR_SRC_RA PPC_BITMASK(8, 63)
+
+#define VAS_PSWID_OFFSET 0x078
+#define VAS_PSWID_EA_HANDLE PPC_BITMASK(0, 31)
+
+#define VAS_SPARE1_OFFSET 0x080
+#define VAS_SPARE2_OFFSET 0x088
+#define VAS_SPARE3_OFFSET 0x090
+#define VAS_SPARE4_OFFSET 0x130
+#define VAS_SPARE5_OFFSET 0x160
+#define VAS_SPARE6_OFFSET 0x188
+
+#define VAS_LFIFO_BAR_OFFSET 0x0A0
+#define VAS_LFIFO_BAR PPC_BITMASK(8, 53)
+#define VAS_PAGE_MIGRATION_SELECT PPC_BITMASK(54, 56)
+
+#define VAS_LDATA_STAMP_CTL_OFFSET 0x0A8
+#define VAS_LDATA_STAMP PPC_BITMASK(0, 1)
+#define VAS_XTRA_WRITE PPC_BIT(2)
+
+#define VAS_LDMA_CACHE_CTL_OFFSET 0x0B0
+#define VAS_LDMA_TYPE PPC_BITMASK(0, 1)
+#define VAS_LDMA_FIFO_DISABLE PPC_BIT(2)
+
+#define VAS_LRFIFO_PUSH_OFFSET 0x0B8
+#define VAS_LRFIFO_PUSH PPC_BITMASK(0, 15)
+
+#define VAS_CURR_MSG_COUNT_OFFSET 0x0C0
+#define VAS_CURR_MSG_COUNT PPC_BITMASK(0, 7)
+
+#define VAS_LNOTIFY_AFTER_COUNT_OFFSET 0x0C8
+#define VAS_LNOTIFY_AFTER_COUNT PPC_BITMASK(0, 7)
+
+#define VAS_LRX_WCRED_OFFSET 0x0E0
+#define VAS_LRX_WCRED PPC_BITMASK(0, 15)
+
+#define VAS_LRX_WCRED_ADDER_OFFSET 0x190
+#define VAS_LRX_WCRED_ADDER PPC_BITMASK(0, 15)
+
+#define VAS_TX_WCRED_OFFSET 0x0F0
+#define VAS_TX_WCRED PPC_BITMASK(4, 15)
+
+#define VAS_TX_WCRED_ADDER_OFFSET 0x1A0
+#define VAS_TX_WCRED_ADDER PPC_BITMASK(4, 15)
+
+#define VAS_LFIFO_SIZE_OFFSET 0x100
+#define VAS_LFIFO_SIZE PPC_BITMASK(0, 3)
+
+#define VAS_WINCTL_OFFSET 0x108
+#define VAS_WINCTL_OPEN PPC_BIT(0)
+#define VAS_WINCTL_REJ_NO_CREDIT PPC_BIT(1)
+#define VAS_WINCTL_PIN PPC_BIT(2)
+#define VAS_WINCTL_TX_WCRED_MODE PPC_BIT(3)
+#define VAS_WINCTL_RX_WCRED_MODE PPC_BIT(4)
+#define VAS_WINCTL_TX_WORD_MODE PPC_BIT(5)
+#define VAS_WINCTL_RX_WORD_MODE PPC_BIT(6)
+#define VAS_WINCTL_RSVD_TXBUF PPC_BIT(7)
+#define VAS_WINCTL_THRESH_CTL PPC_BITMASK(8, 9)
+#define VAS_WINCTL_FAULT_WIN PPC_BIT(10)
+#define VAS_WINCTL_NX_WIN PPC_BIT(11)
+
+#define VAS_WIN_STATUS_OFFSET 0x110
+#define VAS_WIN_BUSY PPC_BIT(1)
+
+#define VAS_WIN_CTX_CACHING_CTL_OFFSET 0x118
+#define VAS_CASTOUT_REQ PPC_BIT(0)
+#define VAS_PUSH_TO_MEM PPC_BIT(1)
+#define VAS_WIN_CACHE_STATUS PPC_BIT(4)
+
+#define VAS_TX_RSVD_BUF_COUNT_OFFSET 0x120
+#define VAS_RXVD_BUF_COUNT PPC_BITMASK(58, 63)
+
+#define VAS_LRFIFO_WIN_PTR_OFFSET 0x128
+#define VAS_LRX_WIN_ID PPC_BITMASK(0, 15)
+
+/*
+ * Local Notification Control Register controls what happens in _response_
+ * to a paste command and hence applies only to receive windows.
+ */
+#define VAS_LNOTIFY_CTL_OFFSET 0x138
+#define VAS_NOTIFY_DISABLE PPC_BIT(0)
+#define VAS_INTR_DISABLE PPC_BIT(1)
+#define VAS_NOTIFY_EARLY PPC_BIT(2)
+#define VAS_NOTIFY_OSU_INTR PPC_BIT(3)
+
+#define VAS_LNOTIFY_PID_OFFSET 0x140
+#define VAS_LNOTIFY_PID PPC_BITMASK(0, 19)
+
+#define VAS_LNOTIFY_LPID_OFFSET 0x148
+#define VAS_LNOTIFY_LPID PPC_BITMASK(0, 11)
+
+#define VAS_LNOTIFY_TID_OFFSET 0x150
+#define VAS_LNOTIFY_TID PPC_BITMASK(0, 15)
+
+#define VAS_LNOTIFY_SCOPE_OFFSET 0x158
+#define VAS_LNOTIFY_MIN_SCOPE PPC_BITMASK(0, 1)
+#define VAS_LNOTIFY_MAX_SCOPE PPC_BITMASK(2, 3)
+
+#define VAS_NX_UTIL_OFFSET 0x1B0
+#define VAS_NX_UTIL PPC_BITMASK(0, 63)
+
+/* SE: Side effects */
+#define VAS_NX_UTIL_SE_OFFSET 0x1B8
+#define VAS_NX_UTIL_SE PPC_BITMASK(0, 63)
+
+#define VAS_NX_UTIL_ADDER_OFFSET 0x180
+#define VAS_NX_UTIL_ADDER PPC_BITMASK(32, 63)
+
+/*
+ * VREG(x):
+ * Expand a register's short name (eg: LPID) into two parameters:
+ * - the register's short name in string form ("LPID"), and
+ * - the name of the macro (eg: VAS_LPID_OFFSET), defining the
+ * register's offset in the window context
+ */
+#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
+#define VREG(r) VREG_SFX(r, _OFFSET)
+
+/*
+ * Local Notify Scope Control Register. (Receive windows only).
+ */
+enum vas_notify_scope {
+ VAS_SCOPE_LOCAL,
+ VAS_SCOPE_GROUP,
+ VAS_SCOPE_VECTORED_GROUP,
+ VAS_SCOPE_UNUSED,
+};
+
+/*
+ * Local DMA Cache Control Register (Receive windows only).
+ */
+enum vas_dma_type {
+ VAS_DMA_TYPE_INJECT,
+ VAS_DMA_TYPE_WRITE,
+};
+
+/*
+ * Local Notify Scope Control Register. (Receive windows only).
+ * Not applicable to NX receive windows.
+ */
+enum vas_notify_after_count {
+ VAS_NOTIFY_AFTER_256 = 0,
+ VAS_NOTIFY_NONE,
+ VAS_NOTIFY_AFTER_2
+};
+
+/*
+ * NX can generate an interrupt for multiple faults and expects kernel
+ * to process all of them. So read all valid CRB entries until find the
+ * invalid one. So use pswid which is pasted by NX and ccw[0] (reserved
+ * bit in BE) to check valid CRB. CCW[0] will not be touched by user
+ * space. Application gets CRB formt error if it updates this bit.
+ *
+ * Invalidate FIFO during allocation and process all entries from last
+ * successful read until finds invalid pswid and ccw[0] values.
+ * After reading each CRB entry from fault FIFO, the kernel invalidate
+ * it by updating pswid with FIFO_INVALID_ENTRY and CCW[0] with
+ * CCW0_INVALID.
+ */
+#define FIFO_INVALID_ENTRY 0xffffffff
+#define CCW0_INVALID 1
+
+/*
+ * One per instance of VAS. Each instance will have a separate set of
+ * receive windows, one per coprocessor type.
+ *
+ * See also function header of set_vinst_win() for details on ->windows[]
+ * and ->rxwin[] tables.
+ */
+struct vas_instance {
+ int vas_id;
+ struct ida ida;
+ struct list_head node;
+ struct platform_device *pdev;
+
+ u64 hvwc_bar_start;
+ u64 uwc_bar_start;
+ u64 paste_base_addr;
+ u64 paste_win_id_shift;
+
+ u64 irq_port;
+ int virq;
+ int fault_crbs;
+ int fault_fifo_size;
+ int fifo_in_progress; /* To wake up thread or return IRQ_HANDLED */
+ spinlock_t fault_lock; /* Protects fifo_in_progress update */
+ void *fault_fifo;
+ struct pnv_vas_window *fault_win; /* Fault window */
+
+ struct mutex mutex;
+ struct pnv_vas_window *rxwin[VAS_COP_TYPE_MAX];
+ struct pnv_vas_window *windows[VAS_WINDOWS_PER_CHIP];
+
+ char *name;
+ char *dbgname;
+ struct dentry *dbgdir;
+};
+
+/*
+ * In-kernel state a VAS window on PowerNV. One per window.
+ */
+struct pnv_vas_window {
+ struct vas_window vas_win;
+ /* Fields common to send and receive windows */
+ struct vas_instance *vinst;
+ bool tx_win; /* True if send window */
+ bool nx_win; /* True if NX window */
+ bool user_win; /* True if user space window */
+ void *hvwc_map; /* HV window context */
+ void *uwc_map; /* OS/User window context */
+
+ /* Fields applicable only to send windows */
+ void *paste_kaddr;
+ char *paste_addr_name;
+ struct pnv_vas_window *rxwin;
+
+ /* Fields applicable only to receive windows */
+ atomic_t num_txwins;
+};
+
+/*
+ * Container for the hardware state of a window. One per-window.
+ *
+ * A VAS Window context is a 512-byte area in the hardware that contains
+ * a set of 64-bit registers. Individual bit-fields in these registers
+ * determine the configuration/operation of the hardware. struct vas_winctx
+ * is a container for the register fields in the window context.
+ */
+struct vas_winctx {
+ u64 rx_fifo;
+ int rx_fifo_size;
+ int wcreds_max;
+ int rsvd_txbuf_count;
+
+ bool user_win;
+ bool nx_win;
+ bool fault_win;
+ bool rsvd_txbuf_enable;
+ bool pin_win;
+ bool rej_no_credit;
+ bool tx_wcred_mode;
+ bool rx_wcred_mode;
+ bool tx_word_mode;
+ bool rx_word_mode;
+ bool data_stamp;
+ bool xtra_write;
+ bool notify_disable;
+ bool intr_disable;
+ bool fifo_disable;
+ bool notify_early;
+ bool notify_os_intr_reg;
+
+ int lpid;
+ int pidr; /* value from SPRN_PID, not linux pid */
+ int lnotify_lpid;
+ int lnotify_pid;
+ int lnotify_tid;
+ u32 pswid;
+ int rx_win_id;
+ int fault_win_id;
+ int tc_mode;
+
+ u64 irq_port;
+
+ enum vas_dma_type dma_type;
+ enum vas_notify_scope min_scope;
+ enum vas_notify_scope max_scope;
+ enum vas_notify_after_count notify_after_count;
+};
+
+extern struct mutex vas_mutex;
+
+extern struct vas_instance *find_vas_instance(int vasid);
+extern void vas_init_dbgdir(void);
+extern void vas_instance_init_dbgdir(struct vas_instance *vinst);
+extern void vas_window_init_dbgdir(struct pnv_vas_window *win);
+extern void vas_window_free_dbgdir(struct pnv_vas_window *win);
+extern int vas_setup_fault_window(struct vas_instance *vinst);
+extern irqreturn_t vas_fault_thread_fn(int irq, void *data);
+extern irqreturn_t vas_fault_handler(int irq, void *dev_id);
+extern void vas_return_credit(struct pnv_vas_window *window, bool tx);
+extern struct pnv_vas_window *vas_pswid_to_window(struct vas_instance *vinst,
+ uint32_t pswid);
+extern void vas_win_paste_addr(struct pnv_vas_window *window, u64 *addr,
+ int *len);
+
+static inline int vas_window_pid(struct vas_window *window)
+{
+ return pid_vnr(window->task_ref.pid);
+}
+
+static inline void vas_log_write(struct pnv_vas_window *win, char *name,
+ void *regptr, u64 val)
+{
+ if (val)
+ pr_debug("%swin #%d: %s reg %p, val 0x%016llx\n",
+ win->tx_win ? "Tx" : "Rx", win->vas_win.winid,
+ name, regptr, val);
+}
+
+static inline void write_uwc_reg(struct pnv_vas_window *win, char *name,
+ s32 reg, u64 val)
+{
+ void *regptr;
+
+ regptr = win->uwc_map + reg;
+ vas_log_write(win, name, regptr, val);
+
+ out_be64(regptr, val);
+}
+
+static inline void write_hvwc_reg(struct pnv_vas_window *win, char *name,
+ s32 reg, u64 val)
+{
+ void *regptr;
+
+ regptr = win->hvwc_map + reg;
+ vas_log_write(win, name, regptr, val);
+
+ out_be64(regptr, val);
+}
+
+static inline u64 read_hvwc_reg(struct pnv_vas_window *win,
+ char *name __maybe_unused, s32 reg)
+{
+ return in_be64(win->hvwc_map+reg);
+}
+
+/*
+ * Encode/decode the Partition Send Window ID (PSWID) for a window in
+ * a way that we can uniquely identify any window in the system. i.e.
+ * we should be able to locate the 'struct vas_window' given the PSWID.
+ *
+ * Bits Usage
+ * 0:7 VAS id (8 bits)
+ * 8:15 Unused, 0 (3 bits)
+ * 16:31 Window id (16 bits)
+ */
+static inline u32 encode_pswid(int vasid, int winid)
+{
+ return ((u32)winid | (vasid << (31 - 7)));
+}
+
+static inline void decode_pswid(u32 pswid, int *vasid, int *winid)
+{
+ if (vasid)
+ *vasid = pswid >> (31 - 7) & 0xFF;
+
+ if (winid)
+ *winid = pswid & 0xFFFF;
+}
+#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
new file mode 100644
index 000000000..a44869e5e
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -0,0 +1,182 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_PS3
+ bool "Sony PS3"
+ depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
+ select PPC_CELL
+ select USB_OHCI_LITTLE_ENDIAN
+ select USB_OHCI_BIG_ENDIAN_MMIO
+ select USB_EHCI_BIG_ENDIAN_MMIO
+ select HAVE_PCI
+ select IRQ_DOMAIN_NOMAP
+ help
+ This option enables support for the Sony PS3 game console
+ and other platforms using the PS3 hypervisor. Enabling this
+ option will allow building otheros.bld, a kernel image suitable
+ for programming into flash memory, and vmlinux, a kernel image
+ suitable for loading via kexec.
+
+menu "PS3 Platform Options"
+ depends on PPC_PS3
+
+config PS3_ADVANCED
+ depends on PPC_PS3
+ bool "PS3 Advanced configuration options"
+ help
+ This gives you access to some advanced options for the PS3. The
+ defaults should be fine for most users, but these options may make
+ it possible to better control the kernel configuration if you know
+ what you are doing.
+
+ Note that the answer to this question won't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ the questions about these options.
+
+ Most users should say N to this question.
+
+config PS3_HTAB_SIZE
+ depends on PPC_PS3
+ int "PS3 Platform pagetable size" if PS3_ADVANCED
+ range 18 20
+ default 20
+ help
+ This option is only for experts who may have the desire to fine
+ tune the pagetable size on their system. The value here is
+ expressed as the log2 of the page table size. Valid values are
+ 18, 19, and 20, corresponding to 256KB, 512KB and 1MB respectively.
+
+ If unsure, choose the default (20) with the confidence that your
+ system will have optimal runtime performance.
+
+config PS3_DYNAMIC_DMA
+ depends on PPC_PS3
+ bool "PS3 Platform dynamic DMA page table management"
+ help
+ This option will enable kernel support to take advantage of the
+ per device dynamic DMA page table management provided by the Cell
+ processor's IO Controller. This support incurs some runtime
+ overhead and also slightly increases kernel memory usage. The
+ current implementation should be considered experimental.
+
+ This support is mainly for Linux kernel development. If unsure,
+ say N.
+
+config PS3_VUART
+ depends on PPC_PS3
+ tristate
+
+config PS3_PS3AV
+ depends on PPC_PS3
+ tristate "PS3 AV settings driver" if PS3_ADVANCED
+ select PS3_VUART
+ default y
+ help
+ Include support for the PS3 AV Settings driver.
+
+ This support is required for PS3 graphics and sound. In
+ general, all users will say Y or M.
+
+config PS3_SYS_MANAGER
+ depends on PPC_PS3
+ tristate "PS3 System Manager driver" if PS3_ADVANCED
+ select PS3_VUART
+ default y
+ help
+ Include support for the PS3 System Manager.
+
+ This support is required for PS3 system control. In
+ general, all users will say Y or M.
+
+config PS3_VERBOSE_RESULT
+ bool "PS3 Verbose LV1 hypercall results" if PS3_ADVANCED
+ depends on PPC_PS3
+ help
+ Enables more verbose log messages for LV1 hypercall results.
+
+ If in doubt, say N here and reduce the size of the kernel by a
+ small amount.
+
+config PS3_REPOSITORY_WRITE
+ bool "PS3 Repository write support" if PS3_ADVANCED
+ depends on PPC_PS3
+ help
+ Enables support for writing to the PS3 System Repository.
+
+ This support is intended for bootloaders that need to store data
+ in the repository for later boot stages.
+
+ If in doubt, say N here and reduce the size of the kernel by a
+ small amount.
+
+config PS3_STORAGE
+ depends on PPC_PS3
+ tristate
+
+config PS3_DISK
+ tristate "PS3 Disk Storage Driver"
+ depends on PPC_PS3 && BLOCK
+ select PS3_STORAGE
+ help
+ Include support for the PS3 Disk Storage.
+
+ This support is required to access the PS3 hard disk.
+ In general, all users will say Y or M.
+
+config PS3_ROM
+ tristate "PS3 BD/DVD/CD-ROM Storage Driver"
+ depends on PPC_PS3 && SCSI
+ select PS3_STORAGE
+ help
+ Include support for the PS3 ROM Storage.
+
+ This support is required to access the PS3 BD/DVD/CD-ROM drive.
+ In general, all users will say Y or M.
+ Also make sure to say Y or M to "SCSI CDROM support" later.
+
+config PS3_FLASH
+ tristate "PS3 FLASH ROM Storage Driver"
+ depends on PPC_PS3
+ select PS3_STORAGE
+ help
+ Include support for the PS3 FLASH ROM Storage.
+
+ This support is required to access the PS3 FLASH ROM, which
+ contains the boot loader and some boot options.
+ In general, PS3 OtherOS users will say Y or M.
+
+ As this driver needs a fixed buffer of 256 KiB of memory, it can
+ be disabled on the kernel command line using "ps3flash=off", to
+ not allocate this fixed buffer.
+
+config PS3_VRAM
+ tristate "PS3 Video RAM Storage Driver"
+ depends on FB_PS3=y && BLOCK && m
+ help
+ This driver allows you to use excess PS3 video RAM as volatile
+ storage or system swap.
+
+config PS3_LPM
+ tristate "PS3 Logical Performance Monitor support"
+ depends on PPC_PS3
+ help
+ Include support for the PS3 Logical Performance Monitor.
+
+ This support is required to use the logical performance monitor
+ of the PS3's LV1 hypervisor.
+
+ If you intend to use the advanced performance monitoring and
+ profiling support of the Cell processor with programs like
+ perfmon2, then say Y or M, otherwise say N.
+
+config PS3GELIC_UDBG
+ bool "PS3 udbg output via UDP broadcasts on Ethernet"
+ depends on PPC_PS3
+ help
+ Enables udbg early debugging output by sending broadcast UDP
+ via the Ethernet port (UDP port number 18194).
+
+ This driver uses a trivial implementation and is independent
+ from the main PS3 gelic network driver.
+
+ If in doubt, say N here.
+
+endmenu
diff --git a/arch/powerpc/platforms/ps3/Makefile b/arch/powerpc/platforms/ps3/Makefile
new file mode 100644
index 000000000..86bf2967a
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += setup.o mm.o time.o hvcall.o htab.o repository.o
+obj-y += interrupt.o exports.o os-area.o
+obj-y += system-bus.o
+
+obj-$(CONFIG_PS3GELIC_UDBG) += gelic_udbg.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_SPU_BASE) += spu.o
+obj-y += device-init.o
diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c
new file mode 100644
index 000000000..e87360a0f
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/device-init.c
@@ -0,0 +1,975 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 device registration routines.
+ *
+ * Copyright (C) 2007 Sony Computer Entertainment Inc.
+ * Copyright 2007 Sony Corp.
+ */
+
+#include <linux/delay.h>
+#include <linux/freezer.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/reboot.h>
+#include <linux/rcuwait.h>
+
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/ps3stor.h>
+
+#include "platform.h"
+
+static int __init ps3_register_lpm_devices(void)
+{
+ int result;
+ u64 tmp1;
+ u64 tmp2;
+ struct ps3_system_bus_device *dev;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->match_id = PS3_MATCH_ID_LPM;
+ dev->dev_type = PS3_DEVICE_TYPE_LPM;
+
+ /* The current lpm driver only supports a single BE processor. */
+
+ result = ps3_repository_read_be_node_id(0, &dev->lpm.node_id);
+
+ if (result) {
+ pr_debug("%s:%d: ps3_repository_read_be_node_id failed \n",
+ __func__, __LINE__);
+ goto fail_read_repo;
+ }
+
+ result = ps3_repository_read_lpm_privileges(dev->lpm.node_id, &tmp1,
+ &dev->lpm.rights);
+
+ if (result) {
+ pr_debug("%s:%d: ps3_repository_read_lpm_privileges failed\n",
+ __func__, __LINE__);
+ goto fail_read_repo;
+ }
+
+ lv1_get_logical_partition_id(&tmp2);
+
+ if (tmp1 != tmp2) {
+ pr_debug("%s:%d: wrong lpar\n",
+ __func__, __LINE__);
+ result = -ENODEV;
+ goto fail_rights;
+ }
+
+ if (!(dev->lpm.rights & PS3_LPM_RIGHTS_USE_LPM)) {
+ pr_debug("%s:%d: don't have rights to use lpm\n",
+ __func__, __LINE__);
+ result = -EPERM;
+ goto fail_rights;
+ }
+
+ pr_debug("%s:%d: pu_id %llu, rights %llu(%llxh)\n",
+ __func__, __LINE__, dev->lpm.pu_id, dev->lpm.rights,
+ dev->lpm.rights);
+
+ result = ps3_repository_read_pu_id(0, &dev->lpm.pu_id);
+
+ if (result) {
+ pr_debug("%s:%d: ps3_repository_read_pu_id failed \n",
+ __func__, __LINE__);
+ goto fail_read_repo;
+ }
+
+ result = ps3_system_bus_device_register(dev);
+
+ if (result) {
+ pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+ __func__, __LINE__);
+ goto fail_register;
+ }
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return 0;
+
+
+fail_register:
+fail_rights:
+fail_read_repo:
+ kfree(dev);
+ pr_debug(" <- %s:%d: failed\n", __func__, __LINE__);
+ return result;
+}
+
+/**
+ * ps3_setup_gelic_device - Setup and register a gelic device instance.
+ *
+ * Allocates memory for a struct ps3_system_bus_device instance, initialises the
+ * structure members, and registers the device instance with the system bus.
+ */
+
+static int __init ps3_setup_gelic_device(
+ const struct ps3_repository_device *repo)
+{
+ int result;
+ struct layout {
+ struct ps3_system_bus_device dev;
+ struct ps3_dma_region d_region;
+ } *p;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ BUG_ON(repo->bus_type != PS3_BUS_TYPE_SB);
+ BUG_ON(repo->dev_type != PS3_DEV_TYPE_SB_GELIC);
+
+ p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+ if (!p) {
+ result = -ENOMEM;
+ goto fail_malloc;
+ }
+
+ p->dev.match_id = PS3_MATCH_ID_GELIC;
+ p->dev.dev_type = PS3_DEVICE_TYPE_SB;
+ p->dev.bus_id = repo->bus_id;
+ p->dev.dev_id = repo->dev_id;
+ p->dev.d_region = &p->d_region;
+
+ result = ps3_repository_find_interrupt(repo,
+ PS3_INTERRUPT_TYPE_EVENT_PORT, &p->dev.interrupt_id);
+
+ if (result) {
+ pr_debug("%s:%d ps3_repository_find_interrupt failed\n",
+ __func__, __LINE__);
+ goto fail_find_interrupt;
+ }
+
+ BUG_ON(p->dev.interrupt_id != 0);
+
+ result = ps3_dma_region_init(&p->dev, p->dev.d_region, PS3_DMA_64K,
+ PS3_DMA_OTHER, NULL, 0);
+
+ if (result) {
+ pr_debug("%s:%d ps3_dma_region_init failed\n",
+ __func__, __LINE__);
+ goto fail_dma_init;
+ }
+
+ result = ps3_system_bus_device_register(&p->dev);
+
+ if (result) {
+ pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+ __func__, __LINE__);
+ goto fail_device_register;
+ }
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+
+fail_device_register:
+fail_dma_init:
+fail_find_interrupt:
+ kfree(p);
+fail_malloc:
+ pr_debug(" <- %s:%d: fail.\n", __func__, __LINE__);
+ return result;
+}
+
+static int __ref ps3_setup_uhc_device(
+ const struct ps3_repository_device *repo, enum ps3_match_id match_id,
+ enum ps3_interrupt_type interrupt_type, enum ps3_reg_type reg_type)
+{
+ int result;
+ struct layout {
+ struct ps3_system_bus_device dev;
+ struct ps3_dma_region d_region;
+ struct ps3_mmio_region m_region;
+ } *p;
+ u64 bus_addr;
+ u64 len;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ BUG_ON(repo->bus_type != PS3_BUS_TYPE_SB);
+ BUG_ON(repo->dev_type != PS3_DEV_TYPE_SB_USB);
+
+ p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+ if (!p) {
+ result = -ENOMEM;
+ goto fail_malloc;
+ }
+
+ p->dev.match_id = match_id;
+ p->dev.dev_type = PS3_DEVICE_TYPE_SB;
+ p->dev.bus_id = repo->bus_id;
+ p->dev.dev_id = repo->dev_id;
+ p->dev.d_region = &p->d_region;
+ p->dev.m_region = &p->m_region;
+
+ result = ps3_repository_find_interrupt(repo,
+ interrupt_type, &p->dev.interrupt_id);
+
+ if (result) {
+ pr_debug("%s:%d ps3_repository_find_interrupt failed\n",
+ __func__, __LINE__);
+ goto fail_find_interrupt;
+ }
+
+ result = ps3_repository_find_reg(repo, reg_type,
+ &bus_addr, &len);
+
+ if (result) {
+ pr_debug("%s:%d ps3_repository_find_reg failed\n",
+ __func__, __LINE__);
+ goto fail_find_reg;
+ }
+
+ result = ps3_dma_region_init(&p->dev, p->dev.d_region, PS3_DMA_64K,
+ PS3_DMA_INTERNAL, NULL, 0);
+
+ if (result) {
+ pr_debug("%s:%d ps3_dma_region_init failed\n",
+ __func__, __LINE__);
+ goto fail_dma_init;
+ }
+
+ result = ps3_mmio_region_init(&p->dev, p->dev.m_region, bus_addr, len,
+ PS3_MMIO_4K);
+
+ if (result) {
+ pr_debug("%s:%d ps3_mmio_region_init failed\n",
+ __func__, __LINE__);
+ goto fail_mmio_init;
+ }
+
+ result = ps3_system_bus_device_register(&p->dev);
+
+ if (result) {
+ pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+ __func__, __LINE__);
+ goto fail_device_register;
+ }
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+
+fail_device_register:
+fail_mmio_init:
+fail_dma_init:
+fail_find_reg:
+fail_find_interrupt:
+ kfree(p);
+fail_malloc:
+ pr_debug(" <- %s:%d: fail.\n", __func__, __LINE__);
+ return result;
+}
+
+static int __init ps3_setup_ehci_device(
+ const struct ps3_repository_device *repo)
+{
+ return ps3_setup_uhc_device(repo, PS3_MATCH_ID_EHCI,
+ PS3_INTERRUPT_TYPE_SB_EHCI, PS3_REG_TYPE_SB_EHCI);
+}
+
+static int __init ps3_setup_ohci_device(
+ const struct ps3_repository_device *repo)
+{
+ return ps3_setup_uhc_device(repo, PS3_MATCH_ID_OHCI,
+ PS3_INTERRUPT_TYPE_SB_OHCI, PS3_REG_TYPE_SB_OHCI);
+}
+
+static int __init ps3_setup_vuart_device(enum ps3_match_id match_id,
+ unsigned int port_number)
+{
+ int result;
+ struct layout {
+ struct ps3_system_bus_device dev;
+ } *p;
+
+ pr_debug(" -> %s:%d: match_id %u, port %u\n", __func__, __LINE__,
+ match_id, port_number);
+
+ p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+ if (!p)
+ return -ENOMEM;
+
+ p->dev.match_id = match_id;
+ p->dev.dev_type = PS3_DEVICE_TYPE_VUART;
+ p->dev.port_number = port_number;
+
+ result = ps3_system_bus_device_register(&p->dev);
+
+ if (result) {
+ pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+ __func__, __LINE__);
+ goto fail_device_register;
+ }
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return 0;
+
+fail_device_register:
+ kfree(p);
+ pr_debug(" <- %s:%d fail\n", __func__, __LINE__);
+ return result;
+}
+
+static int ps3_setup_storage_dev(const struct ps3_repository_device *repo,
+ enum ps3_match_id match_id)
+{
+ int result;
+ struct ps3_storage_device *p;
+ u64 port, blk_size, num_blocks;
+ unsigned int num_regions, i;
+
+ pr_debug(" -> %s:%u: match_id %u\n", __func__, __LINE__, match_id);
+
+ result = ps3_repository_read_stor_dev_info(repo->bus_index,
+ repo->dev_index, &port,
+ &blk_size, &num_blocks,
+ &num_regions);
+ if (result) {
+ printk(KERN_ERR "%s:%u: _read_stor_dev_info failed %d\n",
+ __func__, __LINE__, result);
+ return -ENODEV;
+ }
+
+ pr_debug("%s:%u: (%u:%u:%u): port %llu blk_size %llu num_blocks %llu "
+ "num_regions %u\n", __func__, __LINE__, repo->bus_index,
+ repo->dev_index, repo->dev_type, port, blk_size, num_blocks,
+ num_regions);
+
+ p = kzalloc(struct_size(p, regions, num_regions), GFP_KERNEL);
+ if (!p) {
+ result = -ENOMEM;
+ goto fail_malloc;
+ }
+
+ p->sbd.match_id = match_id;
+ p->sbd.dev_type = PS3_DEVICE_TYPE_SB;
+ p->sbd.bus_id = repo->bus_id;
+ p->sbd.dev_id = repo->dev_id;
+ p->sbd.d_region = &p->dma_region;
+ p->blk_size = blk_size;
+ p->num_regions = num_regions;
+
+ result = ps3_repository_find_interrupt(repo,
+ PS3_INTERRUPT_TYPE_EVENT_PORT,
+ &p->sbd.interrupt_id);
+ if (result) {
+ printk(KERN_ERR "%s:%u: find_interrupt failed %d\n", __func__,
+ __LINE__, result);
+ result = -ENODEV;
+ goto fail_find_interrupt;
+ }
+
+ for (i = 0; i < num_regions; i++) {
+ unsigned int id;
+ u64 start, size;
+
+ result = ps3_repository_read_stor_dev_region(repo->bus_index,
+ repo->dev_index,
+ i, &id, &start,
+ &size);
+ if (result) {
+ printk(KERN_ERR
+ "%s:%u: read_stor_dev_region failed %d\n",
+ __func__, __LINE__, result);
+ result = -ENODEV;
+ goto fail_read_region;
+ }
+ pr_debug("%s:%u: region %u: id %u start %llu size %llu\n",
+ __func__, __LINE__, i, id, start, size);
+
+ p->regions[i].id = id;
+ p->regions[i].start = start;
+ p->regions[i].size = size;
+ }
+
+ result = ps3_system_bus_device_register(&p->sbd);
+ if (result) {
+ pr_debug("%s:%u ps3_system_bus_device_register failed\n",
+ __func__, __LINE__);
+ goto fail_device_register;
+ }
+
+ pr_debug(" <- %s:%u\n", __func__, __LINE__);
+ return 0;
+
+fail_device_register:
+fail_read_region:
+fail_find_interrupt:
+ kfree(p);
+fail_malloc:
+ pr_debug(" <- %s:%u: fail.\n", __func__, __LINE__);
+ return result;
+}
+
+static int __init ps3_register_vuart_devices(void)
+{
+ int result;
+ unsigned int port_number;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ result = ps3_repository_read_vuart_av_port(&port_number);
+ if (result)
+ port_number = 0; /* av default */
+
+ result = ps3_setup_vuart_device(PS3_MATCH_ID_AV_SETTINGS, port_number);
+ WARN_ON(result);
+
+ result = ps3_repository_read_vuart_sysmgr_port(&port_number);
+ if (result)
+ port_number = 2; /* sysmgr default */
+
+ result = ps3_setup_vuart_device(PS3_MATCH_ID_SYSTEM_MANAGER,
+ port_number);
+ WARN_ON(result);
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+}
+
+static int __init ps3_register_sound_devices(void)
+{
+ int result;
+ struct layout {
+ struct ps3_system_bus_device dev;
+ struct ps3_dma_region d_region;
+ struct ps3_mmio_region m_region;
+ } *p;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ p->dev.match_id = PS3_MATCH_ID_SOUND;
+ p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+ p->dev.d_region = &p->d_region;
+ p->dev.m_region = &p->m_region;
+
+ result = ps3_system_bus_device_register(&p->dev);
+
+ if (result) {
+ pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+ __func__, __LINE__);
+ goto fail_device_register;
+ }
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return 0;
+
+fail_device_register:
+ kfree(p);
+ pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+ return result;
+}
+
+static int __init ps3_register_graphics_devices(void)
+{
+ int result;
+ struct layout {
+ struct ps3_system_bus_device dev;
+ } *p;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+ if (!p)
+ return -ENOMEM;
+
+ p->dev.match_id = PS3_MATCH_ID_GPU;
+ p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_FB;
+ p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+
+ result = ps3_system_bus_device_register(&p->dev);
+
+ if (result) {
+ pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+ __func__, __LINE__);
+ goto fail_device_register;
+ }
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return 0;
+
+fail_device_register:
+ kfree(p);
+ pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+ return result;
+}
+
+static int __init ps3_register_ramdisk_device(void)
+{
+ int result;
+ struct layout {
+ struct ps3_system_bus_device dev;
+ } *p;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ p = kzalloc(sizeof(struct layout), GFP_KERNEL);
+
+ if (!p)
+ return -ENOMEM;
+
+ p->dev.match_id = PS3_MATCH_ID_GPU;
+ p->dev.match_sub_id = PS3_MATCH_SUB_ID_GPU_RAMDISK;
+ p->dev.dev_type = PS3_DEVICE_TYPE_IOC0;
+
+ result = ps3_system_bus_device_register(&p->dev);
+
+ if (result) {
+ pr_debug("%s:%d ps3_system_bus_device_register failed\n",
+ __func__, __LINE__);
+ goto fail_device_register;
+ }
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return 0;
+
+fail_device_register:
+ kfree(p);
+ pr_debug(" <- %s:%d failed\n", __func__, __LINE__);
+ return result;
+}
+
+/**
+ * ps3_setup_dynamic_device - Setup a dynamic device from the repository
+ */
+
+static int ps3_setup_dynamic_device(const struct ps3_repository_device *repo)
+{
+ int result;
+
+ switch (repo->dev_type) {
+ case PS3_DEV_TYPE_STOR_DISK:
+ result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_DISK);
+
+ /* Some devices are not accessible from the Other OS lpar. */
+ if (result == -ENODEV) {
+ result = 0;
+ pr_debug("%s:%u: not accessible\n", __func__,
+ __LINE__);
+ }
+
+ if (result)
+ pr_debug("%s:%u ps3_setup_storage_dev failed\n",
+ __func__, __LINE__);
+ break;
+
+ case PS3_DEV_TYPE_STOR_ROM:
+ result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_ROM);
+ if (result)
+ pr_debug("%s:%u ps3_setup_storage_dev failed\n",
+ __func__, __LINE__);
+ break;
+
+ case PS3_DEV_TYPE_STOR_FLASH:
+ result = ps3_setup_storage_dev(repo, PS3_MATCH_ID_STOR_FLASH);
+ if (result)
+ pr_debug("%s:%u ps3_setup_storage_dev failed\n",
+ __func__, __LINE__);
+ break;
+
+ default:
+ result = 0;
+ pr_debug("%s:%u: unsupported dev_type %u\n", __func__, __LINE__,
+ repo->dev_type);
+ }
+
+ return result;
+}
+
+/**
+ * ps3_setup_static_device - Setup a static device from the repository
+ */
+
+static int __init ps3_setup_static_device(const struct ps3_repository_device *repo)
+{
+ int result;
+
+ switch (repo->dev_type) {
+ case PS3_DEV_TYPE_SB_GELIC:
+ result = ps3_setup_gelic_device(repo);
+ if (result) {
+ pr_debug("%s:%d ps3_setup_gelic_device failed\n",
+ __func__, __LINE__);
+ }
+ break;
+ case PS3_DEV_TYPE_SB_USB:
+
+ /* Each USB device has both an EHCI and an OHCI HC */
+
+ result = ps3_setup_ehci_device(repo);
+
+ if (result) {
+ pr_debug("%s:%d ps3_setup_ehci_device failed\n",
+ __func__, __LINE__);
+ }
+
+ result = ps3_setup_ohci_device(repo);
+
+ if (result) {
+ pr_debug("%s:%d ps3_setup_ohci_device failed\n",
+ __func__, __LINE__);
+ }
+ break;
+
+ default:
+ return ps3_setup_dynamic_device(repo);
+ }
+
+ return result;
+}
+
+static void ps3_find_and_add_device(u64 bus_id, u64 dev_id)
+{
+ struct ps3_repository_device repo;
+ int res;
+ unsigned int retries;
+ unsigned long rem;
+
+ /*
+ * On some firmware versions (e.g. 1.90), the device may not show up
+ * in the repository immediately
+ */
+ for (retries = 0; retries < 10; retries++) {
+ res = ps3_repository_find_device_by_id(&repo, bus_id, dev_id);
+ if (!res)
+ goto found;
+
+ rem = msleep_interruptible(100);
+ if (rem)
+ break;
+ }
+ pr_warn("%s:%u: device %llu:%llu not found\n",
+ __func__, __LINE__, bus_id, dev_id);
+ return;
+
+found:
+ if (retries)
+ pr_debug("%s:%u: device %llu:%llu found after %u retries\n",
+ __func__, __LINE__, bus_id, dev_id, retries);
+
+ ps3_setup_dynamic_device(&repo);
+ return;
+}
+
+#define PS3_NOTIFICATION_DEV_ID ULONG_MAX
+#define PS3_NOTIFICATION_INTERRUPT_ID 0
+
+struct ps3_notification_device {
+ struct ps3_system_bus_device sbd;
+ spinlock_t lock;
+ u64 tag;
+ u64 lv1_status;
+ struct rcuwait wait;
+ bool done;
+};
+
+enum ps3_notify_type {
+ notify_device_ready = 0,
+ notify_region_probe = 1,
+ notify_region_update = 2,
+};
+
+struct ps3_notify_cmd {
+ u64 operation_code; /* must be zero */
+ u64 event_mask; /* OR of 1UL << enum ps3_notify_type */
+};
+
+struct ps3_notify_event {
+ u64 event_type; /* enum ps3_notify_type */
+ u64 bus_id;
+ u64 dev_id;
+ u64 dev_type;
+ u64 dev_port;
+};
+
+static irqreturn_t ps3_notification_interrupt(int irq, void *data)
+{
+ struct ps3_notification_device *dev = data;
+ int res;
+ u64 tag, status;
+
+ spin_lock(&dev->lock);
+ res = lv1_storage_get_async_status(PS3_NOTIFICATION_DEV_ID, &tag,
+ &status);
+ if (tag != dev->tag)
+ pr_err("%s:%u: tag mismatch, got %llx, expected %llx\n",
+ __func__, __LINE__, tag, dev->tag);
+
+ if (res) {
+ pr_err("%s:%u: res %d status 0x%llx\n", __func__, __LINE__, res,
+ status);
+ } else {
+ pr_debug("%s:%u: completed, status 0x%llx\n", __func__,
+ __LINE__, status);
+ dev->lv1_status = status;
+ dev->done = true;
+ rcuwait_wake_up(&dev->wait);
+ }
+ spin_unlock(&dev->lock);
+ return IRQ_HANDLED;
+}
+
+static int ps3_notification_read_write(struct ps3_notification_device *dev,
+ u64 lpar, int write)
+{
+ const char *op = write ? "write" : "read";
+ unsigned long flags;
+ int res;
+
+ spin_lock_irqsave(&dev->lock, flags);
+ res = write ? lv1_storage_write(dev->sbd.dev_id, 0, 0, 1, 0, lpar,
+ &dev->tag)
+ : lv1_storage_read(dev->sbd.dev_id, 0, 0, 1, 0, lpar,
+ &dev->tag);
+ dev->done = false;
+ spin_unlock_irqrestore(&dev->lock, flags);
+ if (res) {
+ pr_err("%s:%u: %s failed %d\n", __func__, __LINE__, op, res);
+ return -EPERM;
+ }
+ pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op);
+
+ rcuwait_wait_event(&dev->wait, dev->done || kthread_should_stop(), TASK_IDLE);
+
+ if (kthread_should_stop())
+ res = -EINTR;
+
+ if (dev->lv1_status) {
+ pr_err("%s:%u: %s not completed, status 0x%llx\n", __func__,
+ __LINE__, op, dev->lv1_status);
+ return -EIO;
+ }
+ pr_debug("%s:%u: notification %s completed\n", __func__, __LINE__, op);
+
+ return 0;
+}
+
+static struct task_struct *probe_task;
+
+/**
+ * ps3_probe_thread - Background repository probing at system startup.
+ *
+ * This implementation only supports background probing on a single bus.
+ * It uses the hypervisor's storage device notification mechanism to wait until
+ * a storage device is ready. The device notification mechanism uses a
+ * pseudo device to asynchronously notify the guest when storage devices become
+ * ready. The notification device has a block size of 512 bytes.
+ */
+
+static int ps3_probe_thread(void *data)
+{
+ struct ps3_notification_device dev;
+ int res;
+ unsigned int irq;
+ u64 lpar;
+ void *buf;
+ struct ps3_notify_cmd *notify_cmd;
+ struct ps3_notify_event *notify_event;
+
+ pr_debug(" -> %s:%u: kthread started\n", __func__, __LINE__);
+
+ buf = kzalloc(512, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ lpar = ps3_mm_phys_to_lpar(__pa(buf));
+ notify_cmd = buf;
+ notify_event = buf;
+
+ /* dummy system bus device */
+ dev.sbd.bus_id = (u64)data;
+ dev.sbd.dev_id = PS3_NOTIFICATION_DEV_ID;
+ dev.sbd.interrupt_id = PS3_NOTIFICATION_INTERRUPT_ID;
+
+ res = lv1_open_device(dev.sbd.bus_id, dev.sbd.dev_id, 0);
+ if (res) {
+ pr_err("%s:%u: lv1_open_device failed %s\n", __func__,
+ __LINE__, ps3_result(res));
+ goto fail_free;
+ }
+
+ res = ps3_sb_event_receive_port_setup(&dev.sbd, PS3_BINDING_CPU_ANY,
+ &irq);
+ if (res) {
+ pr_err("%s:%u: ps3_sb_event_receive_port_setup failed %d\n",
+ __func__, __LINE__, res);
+ goto fail_close_device;
+ }
+
+ spin_lock_init(&dev.lock);
+ rcuwait_init(&dev.wait);
+
+ res = request_irq(irq, ps3_notification_interrupt, 0,
+ "ps3_notification", &dev);
+ if (res) {
+ pr_err("%s:%u: request_irq failed %d\n", __func__, __LINE__,
+ res);
+ goto fail_sb_event_receive_port_destroy;
+ }
+
+ /* Setup and write the request for device notification. */
+ notify_cmd->operation_code = 0; /* must be zero */
+ notify_cmd->event_mask = 1UL << notify_region_probe;
+
+ res = ps3_notification_read_write(&dev, lpar, 1);
+ if (res)
+ goto fail_free_irq;
+
+ /* Loop here processing the requested notification events. */
+ do {
+ try_to_freeze();
+
+ memset(notify_event, 0, sizeof(*notify_event));
+
+ res = ps3_notification_read_write(&dev, lpar, 0);
+ if (res)
+ break;
+
+ pr_debug("%s:%u: notify event type 0x%llx bus id %llu dev id %llu"
+ " type %llu port %llu\n", __func__, __LINE__,
+ notify_event->event_type, notify_event->bus_id,
+ notify_event->dev_id, notify_event->dev_type,
+ notify_event->dev_port);
+
+ if (notify_event->event_type != notify_region_probe ||
+ notify_event->bus_id != dev.sbd.bus_id) {
+ pr_warn("%s:%u: bad notify_event: event %llu, dev_id %llu, dev_type %llu\n",
+ __func__, __LINE__, notify_event->event_type,
+ notify_event->dev_id, notify_event->dev_type);
+ continue;
+ }
+
+ ps3_find_and_add_device(dev.sbd.bus_id, notify_event->dev_id);
+
+ } while (!kthread_should_stop());
+
+fail_free_irq:
+ free_irq(irq, &dev);
+fail_sb_event_receive_port_destroy:
+ ps3_sb_event_receive_port_destroy(&dev.sbd, irq);
+fail_close_device:
+ lv1_close_device(dev.sbd.bus_id, dev.sbd.dev_id);
+fail_free:
+ kfree(buf);
+
+ probe_task = NULL;
+
+ pr_debug(" <- %s:%u: kthread finished\n", __func__, __LINE__);
+
+ return 0;
+}
+
+/**
+ * ps3_stop_probe_thread - Stops the background probe thread.
+ *
+ */
+
+static int ps3_stop_probe_thread(struct notifier_block *nb, unsigned long code,
+ void *data)
+{
+ if (probe_task)
+ kthread_stop(probe_task);
+ return 0;
+}
+
+static struct notifier_block nb = {
+ .notifier_call = ps3_stop_probe_thread
+};
+
+/**
+ * ps3_start_probe_thread - Starts the background probe thread.
+ *
+ */
+
+static int __init ps3_start_probe_thread(enum ps3_bus_type bus_type)
+{
+ int result;
+ struct task_struct *task;
+ struct ps3_repository_device repo;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ memset(&repo, 0, sizeof(repo));
+
+ repo.bus_type = bus_type;
+
+ result = ps3_repository_find_bus(repo.bus_type, 0, &repo.bus_index);
+
+ if (result) {
+ printk(KERN_ERR "%s: Cannot find bus (%d)\n", __func__, result);
+ return -ENODEV;
+ }
+
+ result = ps3_repository_read_bus_id(repo.bus_index, &repo.bus_id);
+
+ if (result) {
+ printk(KERN_ERR "%s: read_bus_id failed %d\n", __func__,
+ result);
+ return -ENODEV;
+ }
+
+ task = kthread_run(ps3_probe_thread, (void *)repo.bus_id,
+ "ps3-probe-%u", bus_type);
+
+ if (IS_ERR(task)) {
+ result = PTR_ERR(task);
+ printk(KERN_ERR "%s: kthread_run failed %d\n", __func__,
+ result);
+ return result;
+ }
+
+ probe_task = task;
+ register_reboot_notifier(&nb);
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return 0;
+}
+
+/**
+ * ps3_register_devices - Probe the system and register devices found.
+ *
+ * A device_initcall() routine.
+ */
+
+static int __init ps3_register_devices(void)
+{
+ int result;
+
+ if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+ return -ENODEV;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ /* ps3_repository_dump_bus_info(); */
+
+ result = ps3_start_probe_thread(PS3_BUS_TYPE_STORAGE);
+
+ ps3_register_vuart_devices();
+
+ ps3_register_graphics_devices();
+
+ ps3_repository_find_devices(PS3_BUS_TYPE_SB, ps3_setup_static_device);
+
+ ps3_register_sound_devices();
+
+ ps3_register_lpm_devices();
+
+ ps3_register_ramdisk_device();
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return 0;
+}
+
+device_initcall(ps3_register_devices);
diff --git a/arch/powerpc/platforms/ps3/exports.c b/arch/powerpc/platforms/ps3/exports.c
new file mode 100644
index 000000000..1ac31abcf
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/exports.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 hvcall exports for modules.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#define LV1_CALL(name, in, out, num) \
+ extern s64 _lv1_##name(LV1_##in##_IN_##out##_OUT_ARG_DECL); \
+ EXPORT_SYMBOL(_lv1_##name);
+
+#include <asm/lv1call.h>
diff --git a/arch/powerpc/platforms/ps3/gelic_udbg.c b/arch/powerpc/platforms/ps3/gelic_udbg.c
new file mode 100644
index 000000000..6b298010f
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/gelic_udbg.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * udbg debug output routine via GELIC UDP broadcasts
+ *
+ * Copyright (C) 2007 Sony Computer Entertainment Inc.
+ * Copyright 2006, 2007 Sony Corporation
+ * Copyright (C) 2010 Hector Martin <hector@marcansoft.com>
+ * Copyright (C) 2011 Andre Heider <a.heider@gmail.com>
+ */
+
+#include <linux/if_ether.h>
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+
+#include <asm/io.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+
+#define GELIC_BUS_ID 1
+#define GELIC_DEVICE_ID 0
+#define GELIC_DEBUG_PORT 18194
+#define GELIC_MAX_MESSAGE_SIZE 1000
+
+#define GELIC_LV1_GET_MAC_ADDRESS 1
+#define GELIC_LV1_GET_VLAN_ID 4
+#define GELIC_LV1_VLAN_TX_ETHERNET_0 2
+
+#define GELIC_DESCR_DMA_STAT_MASK 0xf0000000
+#define GELIC_DESCR_DMA_CARDOWNED 0xa0000000
+
+#define GELIC_DESCR_TX_DMA_IKE 0x00080000
+#define GELIC_DESCR_TX_DMA_NO_CHKSUM 0x00000000
+#define GELIC_DESCR_TX_DMA_FRAME_TAIL 0x00040000
+
+#define GELIC_DESCR_DMA_CMD_NO_CHKSUM (GELIC_DESCR_DMA_CARDOWNED | \
+ GELIC_DESCR_TX_DMA_IKE | \
+ GELIC_DESCR_TX_DMA_NO_CHKSUM)
+
+static u64 bus_addr;
+
+struct gelic_descr {
+ /* as defined by the hardware */
+ __be32 buf_addr;
+ __be32 buf_size;
+ __be32 next_descr_addr;
+ __be32 dmac_cmd_status;
+ __be32 result_size;
+ __be32 valid_size; /* all zeroes for tx */
+ __be32 data_status;
+ __be32 data_error; /* all zeroes for tx */
+} __attribute__((aligned(32)));
+
+struct debug_block {
+ struct gelic_descr descr;
+ u8 pkt[1520];
+} __packed;
+
+static __iomem struct ethhdr *h_eth;
+static __iomem struct vlan_hdr *h_vlan;
+static __iomem struct iphdr *h_ip;
+static __iomem struct udphdr *h_udp;
+
+static __iomem char *pmsg;
+static __iomem char *pmsgc;
+
+static __iomem struct debug_block dbg __attribute__((aligned(32)));
+
+static int header_size;
+
+static void map_dma_mem(int bus_id, int dev_id, void *start, size_t len,
+ u64 *real_bus_addr)
+{
+ s64 result;
+ u64 real_addr = ((u64)start) & 0x0fffffffffffffffUL;
+ u64 real_end = real_addr + len;
+ u64 map_start = real_addr & ~0xfff;
+ u64 map_end = (real_end + 0xfff) & ~0xfff;
+ u64 bus_addr = 0;
+
+ u64 flags = 0xf800000000000000UL;
+
+ result = lv1_allocate_device_dma_region(bus_id, dev_id,
+ map_end - map_start, 12, 0,
+ &bus_addr);
+ if (result)
+ lv1_panic(0);
+
+ result = lv1_map_device_dma_region(bus_id, dev_id, map_start,
+ bus_addr, map_end - map_start,
+ flags);
+ if (result)
+ lv1_panic(0);
+
+ *real_bus_addr = bus_addr + real_addr - map_start;
+}
+
+static int unmap_dma_mem(int bus_id, int dev_id, u64 bus_addr, size_t len)
+{
+ s64 result;
+ u64 real_bus_addr;
+
+ real_bus_addr = bus_addr & ~0xfff;
+ len += bus_addr - real_bus_addr;
+ len = (len + 0xfff) & ~0xfff;
+
+ result = lv1_unmap_device_dma_region(bus_id, dev_id, real_bus_addr,
+ len);
+ if (result)
+ return result;
+
+ return lv1_free_device_dma_region(bus_id, dev_id, real_bus_addr);
+}
+
+static void __init gelic_debug_init(void)
+{
+ s64 result;
+ u64 v2;
+ u64 mac;
+ u64 vlan_id;
+
+ result = lv1_open_device(GELIC_BUS_ID, GELIC_DEVICE_ID, 0);
+ if (result)
+ lv1_panic(0);
+
+ map_dma_mem(GELIC_BUS_ID, GELIC_DEVICE_ID, &dbg, sizeof(dbg),
+ &bus_addr);
+
+ memset(&dbg, 0, sizeof(dbg));
+
+ dbg.descr.buf_addr = bus_addr + offsetof(struct debug_block, pkt);
+
+ wmb();
+
+ result = lv1_net_control(GELIC_BUS_ID, GELIC_DEVICE_ID,
+ GELIC_LV1_GET_MAC_ADDRESS, 0, 0, 0,
+ &mac, &v2);
+ if (result)
+ lv1_panic(0);
+
+ mac <<= 16;
+
+ h_eth = (struct ethhdr *)dbg.pkt;
+
+ eth_broadcast_addr(h_eth->h_dest);
+ memcpy(&h_eth->h_source, &mac, ETH_ALEN);
+
+ header_size = sizeof(struct ethhdr);
+
+ result = lv1_net_control(GELIC_BUS_ID, GELIC_DEVICE_ID,
+ GELIC_LV1_GET_VLAN_ID,
+ GELIC_LV1_VLAN_TX_ETHERNET_0, 0, 0,
+ &vlan_id, &v2);
+ if (!result) {
+ h_eth->h_proto= ETH_P_8021Q;
+
+ header_size += sizeof(struct vlan_hdr);
+ h_vlan = (struct vlan_hdr *)(h_eth + 1);
+ h_vlan->h_vlan_TCI = vlan_id;
+ h_vlan->h_vlan_encapsulated_proto = ETH_P_IP;
+ h_ip = (struct iphdr *)(h_vlan + 1);
+ } else {
+ h_eth->h_proto= 0x0800;
+ h_ip = (struct iphdr *)(h_eth + 1);
+ }
+
+ header_size += sizeof(struct iphdr);
+ h_ip->version = 4;
+ h_ip->ihl = 5;
+ h_ip->ttl = 10;
+ h_ip->protocol = 0x11;
+ h_ip->saddr = 0x00000000;
+ h_ip->daddr = 0xffffffff;
+
+ header_size += sizeof(struct udphdr);
+ h_udp = (struct udphdr *)(h_ip + 1);
+ h_udp->source = GELIC_DEBUG_PORT;
+ h_udp->dest = GELIC_DEBUG_PORT;
+
+ pmsgc = pmsg = (char *)(h_udp + 1);
+}
+
+static void gelic_debug_shutdown(void)
+{
+ if (bus_addr)
+ unmap_dma_mem(GELIC_BUS_ID, GELIC_DEVICE_ID,
+ bus_addr, sizeof(dbg));
+ lv1_close_device(GELIC_BUS_ID, GELIC_DEVICE_ID);
+}
+
+static void gelic_sendbuf(int msgsize)
+{
+ u16 *p;
+ u32 sum;
+ int i;
+
+ dbg.descr.buf_size = header_size + msgsize;
+ h_ip->tot_len = msgsize + sizeof(struct udphdr) +
+ sizeof(struct iphdr);
+ h_udp->len = msgsize + sizeof(struct udphdr);
+
+ h_ip->check = 0;
+ sum = 0;
+ p = (u16 *)h_ip;
+ for (i = 0; i < 5; i++)
+ sum += *p++;
+ h_ip->check = ~(sum + (sum >> 16));
+
+ dbg.descr.dmac_cmd_status = GELIC_DESCR_DMA_CMD_NO_CHKSUM |
+ GELIC_DESCR_TX_DMA_FRAME_TAIL;
+ dbg.descr.result_size = 0;
+ dbg.descr.data_status = 0;
+
+ wmb();
+
+ lv1_net_start_tx_dma(GELIC_BUS_ID, GELIC_DEVICE_ID, bus_addr, 0);
+
+ while ((dbg.descr.dmac_cmd_status & GELIC_DESCR_DMA_STAT_MASK) ==
+ GELIC_DESCR_DMA_CARDOWNED)
+ cpu_relax();
+}
+
+static void ps3gelic_udbg_putc(char ch)
+{
+ *pmsgc++ = ch;
+ if (ch == '\n' || (pmsgc-pmsg) >= GELIC_MAX_MESSAGE_SIZE) {
+ gelic_sendbuf(pmsgc-pmsg);
+ pmsgc = pmsg;
+ }
+}
+
+void __init udbg_init_ps3gelic(void)
+{
+ gelic_debug_init();
+ udbg_putc = ps3gelic_udbg_putc;
+}
+
+void udbg_shutdown_ps3gelic(void)
+{
+ udbg_putc = NULL;
+ gelic_debug_shutdown();
+}
+EXPORT_SYMBOL(udbg_shutdown_ps3gelic);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
new file mode 100644
index 000000000..9de62bd52
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 pagetable management routines.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006, 2007 Sony Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/ps3fb.h>
+
+#define PS3_VERBOSE_RESULT
+#include "platform.h"
+
+/**
+ * enum lpar_vas_id - id of LPAR virtual address space.
+ * @lpar_vas_id_current: Current selected virtual address space
+ *
+ * Identify the target LPAR address space.
+ */
+
+enum ps3_lpar_vas_id {
+ PS3_LPAR_VAS_ID_CURRENT = 0,
+};
+
+
+static DEFINE_SPINLOCK(ps3_htab_lock);
+
+static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
+ unsigned long pa, unsigned long rflags, unsigned long vflags,
+ int psize, int apsize, int ssize)
+{
+ int result;
+ u64 hpte_v, hpte_r;
+ u64 inserted_index;
+ u64 evicted_v, evicted_r;
+ u64 hpte_v_array[4], hpte_rs;
+ unsigned long flags;
+ long ret = -1;
+
+ /*
+ * lv1_insert_htab_entry() will search for victim
+ * entry in both primary and secondary pte group
+ */
+ vflags &= ~HPTE_V_SECONDARY;
+
+ hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
+
+ spin_lock_irqsave(&ps3_htab_lock, flags);
+
+ /* talk hvc to replace entries BOLTED == 0 */
+ result = lv1_insert_htab_entry(PS3_LPAR_VAS_ID_CURRENT, hpte_group,
+ hpte_v, hpte_r,
+ HPTE_V_BOLTED, 0,
+ &inserted_index,
+ &evicted_v, &evicted_r);
+
+ if (result) {
+ /* all entries bolted !*/
+ pr_info("%s:result=%s vpn=%lx pa=%lx ix=%lx v=%llx r=%llx\n",
+ __func__, ps3_result(result), vpn, pa, hpte_group,
+ hpte_v, hpte_r);
+ BUG();
+ }
+
+ /*
+ * see if the entry is inserted into secondary pteg
+ */
+ result = lv1_read_htab_entries(PS3_LPAR_VAS_ID_CURRENT,
+ inserted_index & ~0x3UL,
+ &hpte_v_array[0], &hpte_v_array[1],
+ &hpte_v_array[2], &hpte_v_array[3],
+ &hpte_rs);
+ BUG_ON(result);
+
+ if (hpte_v_array[inserted_index % 4] & HPTE_V_SECONDARY)
+ ret = (inserted_index & 7) | (1 << 3);
+ else
+ ret = inserted_index & 7;
+
+ spin_unlock_irqrestore(&ps3_htab_lock, flags);
+
+ return ret;
+}
+
+static long ps3_hpte_remove(unsigned long hpte_group)
+{
+ panic("ps3_hpte_remove() not implemented");
+ return 0;
+}
+
+static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ unsigned long vpn, int psize, int apsize,
+ int ssize, unsigned long inv_flags)
+{
+ int result;
+ u64 hpte_v, want_v, hpte_rs;
+ u64 hpte_v_array[4];
+ unsigned long flags;
+ long ret;
+
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+ spin_lock_irqsave(&ps3_htab_lock, flags);
+
+ result = lv1_read_htab_entries(PS3_LPAR_VAS_ID_CURRENT, slot & ~0x3UL,
+ &hpte_v_array[0], &hpte_v_array[1],
+ &hpte_v_array[2], &hpte_v_array[3],
+ &hpte_rs);
+
+ if (result) {
+ pr_info("%s: result=%s read vpn=%lx slot=%lx psize=%d\n",
+ __func__, ps3_result(result), vpn, slot, psize);
+ BUG();
+ }
+
+ hpte_v = hpte_v_array[slot % 4];
+
+ /*
+ * As lv1_read_htab_entries() does not give us the RPN, we can
+ * not synthesize the new hpte_r value here, and therefore can
+ * not update the hpte with lv1_insert_htab_entry(), so we
+ * instead invalidate it and ask the caller to update it via
+ * ps3_hpte_insert() by returning a -1 value.
+ */
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+ /* not found */
+ ret = -1;
+ } else {
+ /* entry found, just invalidate it */
+ result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT,
+ slot, 0, 0);
+ ret = -1;
+ }
+
+ spin_unlock_irqrestore(&ps3_htab_lock, flags);
+ return ret;
+}
+
+static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
+ int psize, int ssize)
+{
+ pr_info("ps3_hpte_updateboltedpp() not implemented");
+}
+
+static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
+ int psize, int apsize, int ssize, int local)
+{
+ unsigned long flags;
+ int result;
+
+ spin_lock_irqsave(&ps3_htab_lock, flags);
+
+ result = lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, slot, 0, 0);
+
+ if (result) {
+ pr_info("%s: result=%s vpn=%lx slot=%lx psize=%d\n",
+ __func__, ps3_result(result), vpn, slot, psize);
+ BUG();
+ }
+
+ spin_unlock_irqrestore(&ps3_htab_lock, flags);
+}
+
+/* Called during kexec sequence with MMU off */
+static notrace void ps3_hpte_clear(void)
+{
+ unsigned long hpte_count = (1UL << ppc64_pft_size) >> 4;
+ u64 i;
+
+ for (i = 0; i < hpte_count; i++)
+ lv1_write_htab_entry(PS3_LPAR_VAS_ID_CURRENT, i, 0, 0);
+
+ ps3_mm_shutdown();
+ ps3_mm_vas_destroy();
+}
+
+void __init ps3_hpte_init(unsigned long htab_size)
+{
+ mmu_hash_ops.hpte_invalidate = ps3_hpte_invalidate;
+ mmu_hash_ops.hpte_updatepp = ps3_hpte_updatepp;
+ mmu_hash_ops.hpte_updateboltedpp = ps3_hpte_updateboltedpp;
+ mmu_hash_ops.hpte_insert = ps3_hpte_insert;
+ mmu_hash_ops.hpte_remove = ps3_hpte_remove;
+ mmu_hash_ops.hpte_clear_all = ps3_hpte_clear;
+
+ ppc64_pft_size = __ilog2(htab_size);
+}
+
diff --git a/arch/powerpc/platforms/ps3/hvcall.S b/arch/powerpc/platforms/ps3/hvcall.S
new file mode 100644
index 000000000..509e30ad0
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/hvcall.S
@@ -0,0 +1,792 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * PS3 hvcall interface.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ * Copyright 2003, 2004 (c) MontaVista Software, Inc.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+#define lv1call .long 0x44000022; extsw r3, r3
+
+#define LV1_N_IN_0_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_0_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_1_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_2_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_3_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_4_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_5_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_6_IN_0_OUT LV1_N_IN_0_OUT
+#define LV1_7_IN_0_OUT LV1_N_IN_0_OUT
+
+#define LV1_0_IN_1_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ stdu r3, -8(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 8; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_0_IN_2_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r3, -8(r1); \
+ stdu r4, -16(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 16; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_0_IN_3_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r3, -8(r1); \
+ std r4, -16(r1); \
+ stdu r5, -24(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 24; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_0_IN_7_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r3, -8(r1); \
+ std r4, -16(r1); \
+ std r5, -24(r1); \
+ std r6, -32(r1); \
+ std r7, -40(r1); \
+ std r8, -48(r1); \
+ stdu r9, -56(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 56; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ ld r11, -32(r1); \
+ std r7, 0(r11); \
+ ld r11, -40(r1); \
+ std r8, 0(r11); \
+ ld r11, -48(r1); \
+ std r9, 0(r11); \
+ ld r11, -56(r1); \
+ std r10, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_1_IN_1_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ stdu r4, -8(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 8; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_1_IN_2_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r4, -8(r1); \
+ stdu r5, -16(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 16; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_1_IN_3_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r4, -8(r1); \
+ std r5, -16(r1); \
+ stdu r6, -24(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 24; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_1_IN_4_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r4, -8(r1); \
+ std r5, -16(r1); \
+ std r6, -24(r1); \
+ stdu r7, -32(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 32; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ ld r11, -32(r1); \
+ std r7, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_1_IN_5_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r4, -8(r1); \
+ std r5, -16(r1); \
+ std r6, -24(r1); \
+ std r7, -32(r1); \
+ stdu r8, -40(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 40; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ ld r11, -32(r1); \
+ std r7, 0(r11); \
+ ld r11, -40(r1); \
+ std r8, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_1_IN_6_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r4, -8(r1); \
+ std r5, -16(r1); \
+ std r6, -24(r1); \
+ std r7, -32(r1); \
+ std r8, -40(r1); \
+ stdu r9, -48(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 48; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ ld r11, -32(r1); \
+ std r7, 0(r11); \
+ ld r11, -40(r1); \
+ std r8, 0(r11); \
+ ld r11, -48(r1); \
+ std r9, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_1_IN_7_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r4, -8(r1); \
+ std r5, -16(r1); \
+ std r6, -24(r1); \
+ std r7, -32(r1); \
+ std r8, -40(r1); \
+ std r9, -48(r1); \
+ stdu r10, -56(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 56; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ ld r11, -32(r1); \
+ std r7, 0(r11); \
+ ld r11, -40(r1); \
+ std r8, 0(r11); \
+ ld r11, -48(r1); \
+ std r9, 0(r11); \
+ ld r11, -56(r1); \
+ std r10, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_2_IN_1_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ stdu r5, -8(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 8; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_2_IN_2_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r5, -8(r1); \
+ stdu r6, -16(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 16; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_2_IN_3_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r5, -8(r1); \
+ std r6, -16(r1); \
+ stdu r7, -24(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 24; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_2_IN_4_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r5, -8(r1); \
+ std r6, -16(r1); \
+ std r7, -24(r1); \
+ stdu r8, -32(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 32; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ ld r11, -32(r1); \
+ std r7, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_2_IN_5_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r5, -8(r1); \
+ std r6, -16(r1); \
+ std r7, -24(r1); \
+ std r8, -32(r1); \
+ stdu r9, -40(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 40; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ ld r11, -32(r1); \
+ std r7, 0(r11); \
+ ld r11, -40(r1); \
+ std r8, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_3_IN_1_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ stdu r6, -8(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 8; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_3_IN_2_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r6, -8(r1); \
+ stdu r7, -16(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 16; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_3_IN_3_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r6, -8(r1); \
+ std r7, -16(r1); \
+ stdu r8, -24(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 24; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_4_IN_1_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ stdu r7, -8(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 8; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_4_IN_2_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r7, -8(r1); \
+ stdu r8, -16(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 16; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_4_IN_3_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r7, -8(r1); \
+ std r8, -16(r1); \
+ stdu r9, -24(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 24; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_5_IN_1_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ stdu r8, -8(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 8; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_5_IN_2_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r8, -8(r1); \
+ stdu r9, -16(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 16; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_5_IN_3_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r8, -8(r1); \
+ std r9, -16(r1); \
+ stdu r10, -24(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 24; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, -24(r1); \
+ std r6, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_6_IN_1_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ stdu r9, -8(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 8; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_6_IN_2_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r9, -8(r1); \
+ stdu r10, -16(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 16; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_6_IN_3_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r9, -8(r1); \
+ stdu r10, -16(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 16; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ ld r11, -16(r1); \
+ std r5, 0(r11); \
+ ld r11, 48+8*8(r1); \
+ std r6, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_7_IN_1_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ stdu r10, -8(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ addi r1, r1, 8; \
+ ld r11, -8(r1); \
+ std r4, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_7_IN_6_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ std r10, 48+8*7(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ ld r11, 48+8*7(r1); \
+ std r4, 0(r11); \
+ ld r11, 48+8*8(r1); \
+ std r5, 0(r11); \
+ ld r11, 48+8*9(r1); \
+ std r6, 0(r11); \
+ ld r11, 48+8*10(r1); \
+ std r7, 0(r11); \
+ ld r11, 48+8*11(r1); \
+ std r8, 0(r11); \
+ ld r11, 48+8*12(r1); \
+ std r9, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+#define LV1_8_IN_1_OUT(API_NAME, API_NUMBER) \
+_GLOBAL(_##API_NAME) \
+ \
+ mflr r0; \
+ std r0, 16(r1); \
+ \
+ li r11, API_NUMBER; \
+ lv1call; \
+ \
+ ld r11, 48+8*8(r1); \
+ std r4, 0(r11); \
+ \
+ ld r0, 16(r1); \
+ mtlr r0; \
+ blr
+
+ .text
+
+/* the lv1 underscored call definitions expand here */
+
+#define LV1_CALL(name, in, out, num) LV1_##in##_IN_##out##_OUT(lv1_##name, num)
+#include <asm/lv1call.h>
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
new file mode 100644
index 000000000..49871427f
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -0,0 +1,783 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 interrupt routines.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/smp.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#define FAIL udbg_printf
+#else
+#define DBG pr_devel
+#define FAIL pr_debug
+#endif
+
+/**
+ * struct ps3_bmp - a per cpu irq status and mask bitmap structure
+ * @status: 256 bit status bitmap indexed by plug
+ * @unused_1: Alignment
+ * @mask: 256 bit mask bitmap indexed by plug
+ * @unused_2: Alignment
+ *
+ * The HV maintains per SMT thread mappings of HV outlet to HV plug on
+ * behalf of the guest. These mappings are implemented as 256 bit guest
+ * supplied bitmaps indexed by plug number. The addresses of the bitmaps
+ * are registered with the HV through lv1_configure_irq_state_bitmap().
+ * The HV requires that the 512 bits of status + mask not cross a page
+ * boundary. PS3_BMP_MINALIGN is used to define this minimal 64 byte
+ * alignment.
+ *
+ * The HV supports 256 plugs per thread, assigned as {0..255}, for a total
+ * of 512 plugs supported on a processor. To simplify the logic this
+ * implementation equates HV plug value to Linux virq value, constrains each
+ * interrupt to have a system wide unique plug number, and limits the range
+ * of the plug values to map into the first dword of the bitmaps. This
+ * gives a usable range of plug values of {NR_IRQS_LEGACY..63}. Note
+ * that there is no constraint on how many in this set an individual thread
+ * can acquire.
+ *
+ * The mask is declared as unsigned long so we can use set/clear_bit on it.
+ */
+
+#define PS3_BMP_MINALIGN 64
+
+struct ps3_bmp {
+ struct {
+ u64 status;
+ u64 unused_1[3];
+ unsigned long mask;
+ u64 unused_2[3];
+ };
+};
+
+/**
+ * struct ps3_private - a per cpu data structure
+ * @bmp: ps3_bmp structure
+ * @bmp_lock: Synchronize access to bmp.
+ * @ipi_debug_brk_mask: Mask for debug break IPIs
+ * @ppe_id: HV logical_ppe_id
+ * @thread_id: HV thread_id
+ * @ipi_mask: Mask of IPI virqs
+ */
+
+struct ps3_private {
+ struct ps3_bmp bmp __attribute__ ((aligned (PS3_BMP_MINALIGN)));
+ spinlock_t bmp_lock;
+ u64 ppe_id;
+ u64 thread_id;
+ unsigned long ipi_debug_brk_mask;
+ unsigned long ipi_mask;
+};
+
+static DEFINE_PER_CPU(struct ps3_private, ps3_private);
+
+/**
+ * ps3_chip_mask - Set an interrupt mask bit in ps3_bmp.
+ * @virq: The assigned Linux virq.
+ *
+ * Sets ps3_bmp.mask and calls lv1_did_update_interrupt_mask().
+ */
+
+static void ps3_chip_mask(struct irq_data *d)
+{
+ struct ps3_private *pd = irq_data_get_irq_chip_data(d);
+ unsigned long flags;
+
+ DBG("%s:%d: thread_id %llu, virq %d\n", __func__, __LINE__,
+ pd->thread_id, d->irq);
+
+ local_irq_save(flags);
+ clear_bit(63 - d->irq, &pd->bmp.mask);
+ lv1_did_update_interrupt_mask(pd->ppe_id, pd->thread_id);
+ local_irq_restore(flags);
+}
+
+/**
+ * ps3_chip_unmask - Clear an interrupt mask bit in ps3_bmp.
+ * @virq: The assigned Linux virq.
+ *
+ * Clears ps3_bmp.mask and calls lv1_did_update_interrupt_mask().
+ */
+
+static void ps3_chip_unmask(struct irq_data *d)
+{
+ struct ps3_private *pd = irq_data_get_irq_chip_data(d);
+ unsigned long flags;
+
+ DBG("%s:%d: thread_id %llu, virq %d\n", __func__, __LINE__,
+ pd->thread_id, d->irq);
+
+ local_irq_save(flags);
+ set_bit(63 - d->irq, &pd->bmp.mask);
+ lv1_did_update_interrupt_mask(pd->ppe_id, pd->thread_id);
+ local_irq_restore(flags);
+}
+
+/**
+ * ps3_chip_eoi - HV end-of-interrupt.
+ * @virq: The assigned Linux virq.
+ *
+ * Calls lv1_end_of_interrupt_ext().
+ */
+
+static void ps3_chip_eoi(struct irq_data *d)
+{
+ const struct ps3_private *pd = irq_data_get_irq_chip_data(d);
+
+ /* non-IPIs are EOIed here. */
+
+ if (!test_bit(63 - d->irq, &pd->ipi_mask))
+ lv1_end_of_interrupt_ext(pd->ppe_id, pd->thread_id, d->irq);
+}
+
+/**
+ * ps3_irq_chip - Represents the ps3_bmp as a Linux struct irq_chip.
+ */
+
+static struct irq_chip ps3_irq_chip = {
+ .name = "ps3",
+ .irq_mask = ps3_chip_mask,
+ .irq_unmask = ps3_chip_unmask,
+ .irq_eoi = ps3_chip_eoi,
+};
+
+/**
+ * ps3_virq_setup - virq related setup.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @outlet: The HV outlet from the various create outlet routines.
+ * @virq: The assigned Linux virq.
+ *
+ * Calls irq_create_mapping() to get a virq and sets the chip data to
+ * ps3_private data.
+ */
+
+static int ps3_virq_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
+ unsigned int *virq)
+{
+ int result;
+ struct ps3_private *pd;
+
+ /* This defines the default interrupt distribution policy. */
+
+ if (cpu == PS3_BINDING_CPU_ANY)
+ cpu = 0;
+
+ pd = &per_cpu(ps3_private, cpu);
+
+ *virq = irq_create_mapping(NULL, outlet);
+
+ if (!*virq) {
+ FAIL("%s:%d: irq_create_mapping failed: outlet %lu\n",
+ __func__, __LINE__, outlet);
+ result = -ENOMEM;
+ goto fail_create;
+ }
+
+ DBG("%s:%d: outlet %lu => cpu %u, virq %u\n", __func__, __LINE__,
+ outlet, cpu, *virq);
+
+ result = irq_set_chip_data(*virq, pd);
+
+ if (result) {
+ FAIL("%s:%d: irq_set_chip_data failed\n",
+ __func__, __LINE__);
+ goto fail_set;
+ }
+
+ ps3_chip_mask(irq_get_irq_data(*virq));
+
+ return result;
+
+fail_set:
+ irq_dispose_mapping(*virq);
+fail_create:
+ return result;
+}
+
+/**
+ * ps3_virq_destroy - virq related teardown.
+ * @virq: The assigned Linux virq.
+ *
+ * Clears chip data and calls irq_dispose_mapping() for the virq.
+ */
+
+static int ps3_virq_destroy(unsigned int virq)
+{
+ const struct ps3_private *pd = irq_get_chip_data(virq);
+
+ DBG("%s:%d: ppe_id %llu, thread_id %llu, virq %u\n", __func__,
+ __LINE__, pd->ppe_id, pd->thread_id, virq);
+
+ irq_set_chip_data(virq, NULL);
+ irq_dispose_mapping(virq);
+
+ DBG("%s:%d <-\n", __func__, __LINE__);
+ return 0;
+}
+
+/**
+ * ps3_irq_plug_setup - Generic outlet and virq related setup.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @outlet: The HV outlet from the various create outlet routines.
+ * @virq: The assigned Linux virq.
+ *
+ * Sets up virq and connects the irq plug.
+ */
+
+int ps3_irq_plug_setup(enum ps3_cpu_binding cpu, unsigned long outlet,
+ unsigned int *virq)
+{
+ int result;
+ struct ps3_private *pd;
+
+ result = ps3_virq_setup(cpu, outlet, virq);
+
+ if (result) {
+ FAIL("%s:%d: ps3_virq_setup failed\n", __func__, __LINE__);
+ goto fail_setup;
+ }
+
+ pd = irq_get_chip_data(*virq);
+
+ /* Binds outlet to cpu + virq. */
+
+ result = lv1_connect_irq_plug_ext(pd->ppe_id, pd->thread_id, *virq,
+ outlet, 0);
+
+ if (result) {
+ FAIL("%s:%d: lv1_connect_irq_plug_ext failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ result = -EPERM;
+ goto fail_connect;
+ }
+
+ return result;
+
+fail_connect:
+ ps3_virq_destroy(*virq);
+fail_setup:
+ return result;
+}
+EXPORT_SYMBOL_GPL(ps3_irq_plug_setup);
+
+/**
+ * ps3_irq_plug_destroy - Generic outlet and virq related teardown.
+ * @virq: The assigned Linux virq.
+ *
+ * Disconnects the irq plug and tears down virq.
+ * Do not call for system bus event interrupts setup with
+ * ps3_sb_event_receive_port_setup().
+ */
+
+int ps3_irq_plug_destroy(unsigned int virq)
+{
+ int result;
+ const struct ps3_private *pd = irq_get_chip_data(virq);
+
+ DBG("%s:%d: ppe_id %llu, thread_id %llu, virq %u\n", __func__,
+ __LINE__, pd->ppe_id, pd->thread_id, virq);
+
+ ps3_chip_mask(irq_get_irq_data(virq));
+
+ result = lv1_disconnect_irq_plug_ext(pd->ppe_id, pd->thread_id, virq);
+
+ if (result)
+ FAIL("%s:%d: lv1_disconnect_irq_plug_ext failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+
+ ps3_virq_destroy(virq);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(ps3_irq_plug_destroy);
+
+/**
+ * ps3_event_receive_port_setup - Setup an event receive port.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @virq: The assigned Linux virq.
+ *
+ * The virq can be used with lv1_connect_interrupt_event_receive_port() to
+ * arrange to receive interrupts from system-bus devices, or with
+ * ps3_send_event_locally() to signal events.
+ */
+
+int ps3_event_receive_port_setup(enum ps3_cpu_binding cpu, unsigned int *virq)
+{
+ int result;
+ u64 outlet;
+
+ result = lv1_construct_event_receive_port(&outlet);
+
+ if (result) {
+ FAIL("%s:%d: lv1_construct_event_receive_port failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ *virq = 0;
+ return result;
+ }
+
+ result = ps3_irq_plug_setup(cpu, outlet, virq);
+ BUG_ON(result);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(ps3_event_receive_port_setup);
+
+/**
+ * ps3_event_receive_port_destroy - Destroy an event receive port.
+ * @virq: The assigned Linux virq.
+ *
+ * Since ps3_event_receive_port_destroy destroys the receive port outlet,
+ * SB devices need to call disconnect_interrupt_event_receive_port() before
+ * this.
+ */
+
+int ps3_event_receive_port_destroy(unsigned int virq)
+{
+ int result;
+
+ DBG(" -> %s:%d virq %u\n", __func__, __LINE__, virq);
+
+ ps3_chip_mask(irq_get_irq_data(virq));
+
+ result = lv1_destruct_event_receive_port(virq_to_hw(virq));
+
+ if (result)
+ FAIL("%s:%d: lv1_destruct_event_receive_port failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+
+ /*
+ * Don't call ps3_virq_destroy() here since ps3_smp_cleanup_cpu()
+ * calls from interrupt context (smp_call_function) when kexecing.
+ */
+
+ DBG(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+}
+
+int ps3_send_event_locally(unsigned int virq)
+{
+ return lv1_send_event_locally(virq_to_hw(virq));
+}
+
+/**
+ * ps3_sb_event_receive_port_setup - Setup a system bus event receive port.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @dev: The system bus device instance.
+ * @virq: The assigned Linux virq.
+ *
+ * An event irq represents a virtual device interrupt. The interrupt_id
+ * coresponds to the software interrupt number.
+ */
+
+int ps3_sb_event_receive_port_setup(struct ps3_system_bus_device *dev,
+ enum ps3_cpu_binding cpu, unsigned int *virq)
+{
+ /* this should go in system-bus.c */
+
+ int result;
+
+ result = ps3_event_receive_port_setup(cpu, virq);
+
+ if (result)
+ return result;
+
+ result = lv1_connect_interrupt_event_receive_port(dev->bus_id,
+ dev->dev_id, virq_to_hw(*virq), dev->interrupt_id);
+
+ if (result) {
+ FAIL("%s:%d: lv1_connect_interrupt_event_receive_port"
+ " failed: %s\n", __func__, __LINE__,
+ ps3_result(result));
+ ps3_event_receive_port_destroy(*virq);
+ *virq = 0;
+ return result;
+ }
+
+ DBG("%s:%d: interrupt_id %u, virq %u\n", __func__, __LINE__,
+ dev->interrupt_id, *virq);
+
+ return 0;
+}
+EXPORT_SYMBOL(ps3_sb_event_receive_port_setup);
+
+int ps3_sb_event_receive_port_destroy(struct ps3_system_bus_device *dev,
+ unsigned int virq)
+{
+ /* this should go in system-bus.c */
+
+ int result;
+
+ DBG(" -> %s:%d: interrupt_id %u, virq %u\n", __func__, __LINE__,
+ dev->interrupt_id, virq);
+
+ result = lv1_disconnect_interrupt_event_receive_port(dev->bus_id,
+ dev->dev_id, virq_to_hw(virq), dev->interrupt_id);
+
+ if (result)
+ FAIL("%s:%d: lv1_disconnect_interrupt_event_receive_port"
+ " failed: %s\n", __func__, __LINE__,
+ ps3_result(result));
+
+ result = ps3_event_receive_port_destroy(virq);
+ BUG_ON(result);
+
+ /*
+ * ps3_event_receive_port_destroy() destroys the IRQ plug,
+ * so don't call ps3_irq_plug_destroy() here.
+ */
+
+ result = ps3_virq_destroy(virq);
+ BUG_ON(result);
+
+ DBG(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+}
+EXPORT_SYMBOL(ps3_sb_event_receive_port_destroy);
+
+/**
+ * ps3_io_irq_setup - Setup a system bus io irq.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @interrupt_id: The device interrupt id read from the system repository.
+ * @virq: The assigned Linux virq.
+ *
+ * An io irq represents a non-virtualized device interrupt. interrupt_id
+ * coresponds to the interrupt number of the interrupt controller.
+ */
+
+int ps3_io_irq_setup(enum ps3_cpu_binding cpu, unsigned int interrupt_id,
+ unsigned int *virq)
+{
+ int result;
+ u64 outlet;
+
+ result = lv1_construct_io_irq_outlet(interrupt_id, &outlet);
+
+ if (result) {
+ FAIL("%s:%d: lv1_construct_io_irq_outlet failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return result;
+ }
+
+ result = ps3_irq_plug_setup(cpu, outlet, virq);
+ BUG_ON(result);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(ps3_io_irq_setup);
+
+int ps3_io_irq_destroy(unsigned int virq)
+{
+ int result;
+ unsigned long outlet = virq_to_hw(virq);
+
+ ps3_chip_mask(irq_get_irq_data(virq));
+
+ /*
+ * lv1_destruct_io_irq_outlet() will destroy the IRQ plug,
+ * so call ps3_irq_plug_destroy() first.
+ */
+
+ result = ps3_irq_plug_destroy(virq);
+ BUG_ON(result);
+
+ result = lv1_destruct_io_irq_outlet(outlet);
+
+ if (result)
+ FAIL("%s:%d: lv1_destruct_io_irq_outlet failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(ps3_io_irq_destroy);
+
+/**
+ * ps3_vuart_irq_setup - Setup the system virtual uart virq.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @virt_addr_bmp: The caller supplied virtual uart interrupt bitmap.
+ * @virq: The assigned Linux virq.
+ *
+ * The system supports only a single virtual uart, so multiple calls without
+ * freeing the interrupt will return a wrong state error.
+ */
+
+int ps3_vuart_irq_setup(enum ps3_cpu_binding cpu, void* virt_addr_bmp,
+ unsigned int *virq)
+{
+ int result;
+ u64 outlet;
+ u64 lpar_addr;
+
+ BUG_ON(!is_kernel_addr((u64)virt_addr_bmp));
+
+ lpar_addr = ps3_mm_phys_to_lpar(__pa(virt_addr_bmp));
+
+ result = lv1_configure_virtual_uart_irq(lpar_addr, &outlet);
+
+ if (result) {
+ FAIL("%s:%d: lv1_configure_virtual_uart_irq failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return result;
+ }
+
+ result = ps3_irq_plug_setup(cpu, outlet, virq);
+ BUG_ON(result);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(ps3_vuart_irq_setup);
+
+int ps3_vuart_irq_destroy(unsigned int virq)
+{
+ int result;
+
+ ps3_chip_mask(irq_get_irq_data(virq));
+ result = lv1_deconfigure_virtual_uart_irq();
+
+ if (result) {
+ FAIL("%s:%d: lv1_configure_virtual_uart_irq failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return result;
+ }
+
+ result = ps3_irq_plug_destroy(virq);
+ BUG_ON(result);
+
+ return result;
+}
+EXPORT_SYMBOL_GPL(ps3_vuart_irq_destroy);
+
+/**
+ * ps3_spe_irq_setup - Setup an spe virq.
+ * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be
+ * serviced on.
+ * @spe_id: The spe_id returned from lv1_construct_logical_spe().
+ * @class: The spe interrupt class {0,1,2}.
+ * @virq: The assigned Linux virq.
+ *
+ */
+
+int ps3_spe_irq_setup(enum ps3_cpu_binding cpu, unsigned long spe_id,
+ unsigned int class, unsigned int *virq)
+{
+ int result;
+ u64 outlet;
+
+ BUG_ON(class > 2);
+
+ result = lv1_get_spe_irq_outlet(spe_id, class, &outlet);
+
+ if (result) {
+ FAIL("%s:%d: lv1_get_spe_irq_outlet failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return result;
+ }
+
+ result = ps3_irq_plug_setup(cpu, outlet, virq);
+ BUG_ON(result);
+
+ return result;
+}
+
+int ps3_spe_irq_destroy(unsigned int virq)
+{
+ int result;
+
+ ps3_chip_mask(irq_get_irq_data(virq));
+
+ result = ps3_irq_plug_destroy(virq);
+ BUG_ON(result);
+
+ return result;
+}
+
+
+#define PS3_INVALID_OUTLET ((irq_hw_number_t)-1)
+#define PS3_PLUG_MAX 63
+
+#if defined(DEBUG)
+static void _dump_64_bmp(const char *header, const u64 *p, unsigned cpu,
+ const char* func, int line)
+{
+ pr_debug("%s:%d: %s %u {%04llx_%04llx_%04llx_%04llx}\n",
+ func, line, header, cpu,
+ *p >> 48, (*p >> 32) & 0xffff, (*p >> 16) & 0xffff,
+ *p & 0xffff);
+}
+
+static void __maybe_unused _dump_256_bmp(const char *header,
+ const u64 *p, unsigned cpu, const char* func, int line)
+{
+ pr_debug("%s:%d: %s %u {%016llx:%016llx:%016llx:%016llx}\n",
+ func, line, header, cpu, p[0], p[1], p[2], p[3]);
+}
+
+#define dump_bmp(_x) _dump_bmp(_x, __func__, __LINE__)
+static void _dump_bmp(struct ps3_private* pd, const char* func, int line)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pd->bmp_lock, flags);
+ _dump_64_bmp("stat", &pd->bmp.status, pd->thread_id, func, line);
+ _dump_64_bmp("mask", (u64*)&pd->bmp.mask, pd->thread_id, func, line);
+ spin_unlock_irqrestore(&pd->bmp_lock, flags);
+}
+
+#define dump_mask(_x) _dump_mask(_x, __func__, __LINE__)
+static void __maybe_unused _dump_mask(struct ps3_private *pd,
+ const char* func, int line)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&pd->bmp_lock, flags);
+ _dump_64_bmp("mask", (u64*)&pd->bmp.mask, pd->thread_id, func, line);
+ spin_unlock_irqrestore(&pd->bmp_lock, flags);
+}
+#else
+static void dump_bmp(struct ps3_private* pd) {};
+#endif /* defined(DEBUG) */
+
+static int ps3_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ DBG("%s:%d: hwirq %lu, virq %u\n", __func__, __LINE__, hwirq,
+ virq);
+
+ irq_set_chip_and_handler(virq, &ps3_irq_chip, handle_fasteoi_irq);
+
+ return 0;
+}
+
+static int ps3_host_match(struct irq_domain *h, struct device_node *np,
+ enum irq_domain_bus_token bus_token)
+{
+ /* Match all */
+ return 1;
+}
+
+static const struct irq_domain_ops ps3_host_ops = {
+ .map = ps3_host_map,
+ .match = ps3_host_match,
+};
+
+void __init ps3_register_ipi_debug_brk(unsigned int cpu, unsigned int virq)
+{
+ struct ps3_private *pd = &per_cpu(ps3_private, cpu);
+
+ set_bit(63 - virq, &pd->ipi_debug_brk_mask);
+
+ DBG("%s:%d: cpu %u, virq %u, mask %lxh\n", __func__, __LINE__,
+ cpu, virq, pd->ipi_debug_brk_mask);
+}
+
+void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq)
+{
+ struct ps3_private *pd = &per_cpu(ps3_private, cpu);
+
+ set_bit(63 - virq, &pd->ipi_mask);
+
+ DBG("%s:%d: cpu %u, virq %u, ipi_mask %lxh\n", __func__, __LINE__,
+ cpu, virq, pd->ipi_mask);
+}
+
+static unsigned int ps3_get_irq(void)
+{
+ struct ps3_private *pd = this_cpu_ptr(&ps3_private);
+ u64 x = (pd->bmp.status & pd->bmp.mask);
+ unsigned int plug;
+
+ /* check for ipi break first to stop this cpu ASAP */
+
+ if (x & pd->ipi_debug_brk_mask)
+ x &= pd->ipi_debug_brk_mask;
+
+ asm volatile("cntlzd %0,%1" : "=r" (plug) : "r" (x));
+ plug &= 0x3f;
+
+ if (unlikely(!plug)) {
+ DBG("%s:%d: no plug found: thread_id %llu\n", __func__,
+ __LINE__, pd->thread_id);
+ dump_bmp(&per_cpu(ps3_private, 0));
+ dump_bmp(&per_cpu(ps3_private, 1));
+ return 0;
+ }
+
+#if defined(DEBUG)
+ if (unlikely(plug < NR_IRQS_LEGACY || plug > PS3_PLUG_MAX)) {
+ dump_bmp(&per_cpu(ps3_private, 0));
+ dump_bmp(&per_cpu(ps3_private, 1));
+ BUG();
+ }
+#endif
+
+ /* IPIs are EOIed here. */
+
+ if (test_bit(63 - plug, &pd->ipi_mask))
+ lv1_end_of_interrupt_ext(pd->ppe_id, pd->thread_id, plug);
+
+ return plug;
+}
+
+void __init ps3_init_IRQ(void)
+{
+ int result;
+ unsigned cpu;
+ struct irq_domain *host;
+
+ host = irq_domain_add_nomap(NULL, PS3_PLUG_MAX + 1, &ps3_host_ops, NULL);
+ irq_set_default_host(host);
+
+ for_each_possible_cpu(cpu) {
+ struct ps3_private *pd = &per_cpu(ps3_private, cpu);
+
+ lv1_get_logical_ppe_id(&pd->ppe_id);
+ pd->thread_id = get_hard_smp_processor_id(cpu);
+ spin_lock_init(&pd->bmp_lock);
+
+ DBG("%s:%d: ppe_id %llu, thread_id %llu, bmp %lxh\n",
+ __func__, __LINE__, pd->ppe_id, pd->thread_id,
+ ps3_mm_phys_to_lpar(__pa(&pd->bmp)));
+
+ result = lv1_configure_irq_state_bitmap(pd->ppe_id,
+ pd->thread_id, ps3_mm_phys_to_lpar(__pa(&pd->bmp)));
+
+ if (result)
+ FAIL("%s:%d: lv1_configure_irq_state_bitmap failed:"
+ " %s\n", __func__, __LINE__,
+ ps3_result(result));
+ }
+
+ ppc_md.get_irq = ps3_get_irq;
+}
+
+void ps3_shutdown_IRQ(int cpu)
+{
+ int result;
+ u64 ppe_id;
+ u64 thread_id = get_hard_smp_processor_id(cpu);
+
+ lv1_get_logical_ppe_id(&ppe_id);
+ result = lv1_configure_irq_state_bitmap(ppe_id, thread_id, 0);
+
+ DBG("%s:%d: lv1_configure_irq_state_bitmap (%llu:%llu/%d) %s\n", __func__,
+ __LINE__, ppe_id, thread_id, cpu, ps3_result(result));
+}
diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
new file mode 100644
index 000000000..1326de55f
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -0,0 +1,1254 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 address space management.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+
+#include <asm/cell-regs.h>
+#include <asm/firmware.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/setup.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#else
+#define DBG pr_devel
+#endif
+
+enum {
+#if defined(CONFIG_PS3_DYNAMIC_DMA)
+ USE_DYNAMIC_DMA = 1,
+#else
+ USE_DYNAMIC_DMA = 0,
+#endif
+};
+
+enum {
+ PAGE_SHIFT_4K = 12U,
+ PAGE_SHIFT_64K = 16U,
+ PAGE_SHIFT_16M = 24U,
+};
+
+static unsigned long __init make_page_sizes(unsigned long a, unsigned long b)
+{
+ return (a << 56) | (b << 48);
+}
+
+enum {
+ ALLOCATE_MEMORY_TRY_ALT_UNIT = 0X04,
+ ALLOCATE_MEMORY_ADDR_ZERO = 0X08,
+};
+
+/* valid htab sizes are {18,19,20} = 256K, 512K, 1M */
+
+enum {
+ HTAB_SIZE_MAX = 20U, /* HV limit of 1MB */
+ HTAB_SIZE_MIN = 18U, /* CPU limit of 256KB */
+};
+
+/*============================================================================*/
+/* virtual address space routines */
+/*============================================================================*/
+
+/**
+ * struct mem_region - memory region structure
+ * @base: base address
+ * @size: size in bytes
+ * @offset: difference between base and rm.size
+ * @destroy: flag if region should be destroyed upon shutdown
+ */
+
+struct mem_region {
+ u64 base;
+ u64 size;
+ unsigned long offset;
+ int destroy;
+};
+
+/**
+ * struct map - address space state variables holder
+ * @total: total memory available as reported by HV
+ * @vas_id - HV virtual address space id
+ * @htab_size: htab size in bytes
+ *
+ * The HV virtual address space (vas) allows for hotplug memory regions.
+ * Memory regions can be created and destroyed in the vas at runtime.
+ * @rm: real mode (bootmem) region
+ * @r1: highmem region(s)
+ *
+ * ps3 addresses
+ * virt_addr: a cpu 'translated' effective address
+ * phys_addr: an address in what Linux thinks is the physical address space
+ * lpar_addr: an address in the HV virtual address space
+ * bus_addr: an io controller 'translated' address on a device bus
+ */
+
+struct map {
+ u64 total;
+ u64 vas_id;
+ u64 htab_size;
+ struct mem_region rm;
+ struct mem_region r1;
+};
+
+#define debug_dump_map(x) _debug_dump_map(x, __func__, __LINE__)
+static void __maybe_unused _debug_dump_map(const struct map *m,
+ const char *func, int line)
+{
+ DBG("%s:%d: map.total = %llxh\n", func, line, m->total);
+ DBG("%s:%d: map.rm.size = %llxh\n", func, line, m->rm.size);
+ DBG("%s:%d: map.vas_id = %llu\n", func, line, m->vas_id);
+ DBG("%s:%d: map.htab_size = %llxh\n", func, line, m->htab_size);
+ DBG("%s:%d: map.r1.base = %llxh\n", func, line, m->r1.base);
+ DBG("%s:%d: map.r1.offset = %lxh\n", func, line, m->r1.offset);
+ DBG("%s:%d: map.r1.size = %llxh\n", func, line, m->r1.size);
+}
+
+static struct map map;
+
+/**
+ * ps3_mm_phys_to_lpar - translate a linux physical address to lpar address
+ * @phys_addr: linux physical address
+ */
+
+unsigned long ps3_mm_phys_to_lpar(unsigned long phys_addr)
+{
+ BUG_ON(is_kernel_addr(phys_addr));
+ return (phys_addr < map.rm.size || phys_addr >= map.total)
+ ? phys_addr : phys_addr + map.r1.offset;
+}
+
+EXPORT_SYMBOL(ps3_mm_phys_to_lpar);
+
+/**
+ * ps3_mm_vas_create - create the virtual address space
+ */
+
+void __init ps3_mm_vas_create(unsigned long* htab_size)
+{
+ int result;
+ u64 start_address;
+ u64 size;
+ u64 access_right;
+ u64 max_page_size;
+ u64 flags;
+
+ result = lv1_query_logical_partition_address_region_info(0,
+ &start_address, &size, &access_right, &max_page_size,
+ &flags);
+
+ if (result) {
+ DBG("%s:%d: lv1_query_logical_partition_address_region_info "
+ "failed: %s\n", __func__, __LINE__,
+ ps3_result(result));
+ goto fail;
+ }
+
+ if (max_page_size < PAGE_SHIFT_16M) {
+ DBG("%s:%d: bad max_page_size %llxh\n", __func__, __LINE__,
+ max_page_size);
+ goto fail;
+ }
+
+ BUILD_BUG_ON(CONFIG_PS3_HTAB_SIZE > HTAB_SIZE_MAX);
+ BUILD_BUG_ON(CONFIG_PS3_HTAB_SIZE < HTAB_SIZE_MIN);
+
+ result = lv1_construct_virtual_address_space(CONFIG_PS3_HTAB_SIZE,
+ 2, make_page_sizes(PAGE_SHIFT_16M, PAGE_SHIFT_64K),
+ &map.vas_id, &map.htab_size);
+
+ if (result) {
+ DBG("%s:%d: lv1_construct_virtual_address_space failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ goto fail;
+ }
+
+ result = lv1_select_virtual_address_space(map.vas_id);
+
+ if (result) {
+ DBG("%s:%d: lv1_select_virtual_address_space failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ goto fail;
+ }
+
+ *htab_size = map.htab_size;
+
+ debug_dump_map(&map);
+
+ return;
+
+fail:
+ panic("ps3_mm_vas_create failed");
+}
+
+/**
+ * ps3_mm_vas_destroy -
+ *
+ * called during kexec sequence with MMU off.
+ */
+
+notrace void ps3_mm_vas_destroy(void)
+{
+ int result;
+
+ if (map.vas_id) {
+ result = lv1_select_virtual_address_space(0);
+ result += lv1_destruct_virtual_address_space(map.vas_id);
+
+ if (result) {
+ lv1_panic(0);
+ }
+
+ map.vas_id = 0;
+ }
+}
+
+static int __init ps3_mm_get_repository_highmem(struct mem_region *r)
+{
+ int result;
+
+ /* Assume a single highmem region. */
+
+ result = ps3_repository_read_highmem_info(0, &r->base, &r->size);
+
+ if (result)
+ goto zero_region;
+
+ if (!r->base || !r->size) {
+ result = -1;
+ goto zero_region;
+ }
+
+ r->offset = r->base - map.rm.size;
+
+ DBG("%s:%d: Found high region in repository: %llxh %llxh\n",
+ __func__, __LINE__, r->base, r->size);
+
+ return 0;
+
+zero_region:
+ DBG("%s:%d: No high region in repository.\n", __func__, __LINE__);
+
+ r->size = r->base = r->offset = 0;
+ return result;
+}
+
+static int ps3_mm_set_repository_highmem(const struct mem_region *r)
+{
+ /* Assume a single highmem region. */
+
+ return r ? ps3_repository_write_highmem_info(0, r->base, r->size) :
+ ps3_repository_write_highmem_info(0, 0, 0);
+}
+
+/**
+ * ps3_mm_region_create - create a memory region in the vas
+ * @r: pointer to a struct mem_region to accept initialized values
+ * @size: requested region size
+ *
+ * This implementation creates the region with the vas large page size.
+ * @size is rounded down to a multiple of the vas large page size.
+ */
+
+static int ps3_mm_region_create(struct mem_region *r, unsigned long size)
+{
+ int result;
+ u64 muid;
+
+ r->size = ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M);
+
+ DBG("%s:%d requested %lxh\n", __func__, __LINE__, size);
+ DBG("%s:%d actual %llxh\n", __func__, __LINE__, r->size);
+ DBG("%s:%d difference %llxh (%lluMB)\n", __func__, __LINE__,
+ size - r->size, (size - r->size) / 1024 / 1024);
+
+ if (r->size == 0) {
+ DBG("%s:%d: size == 0\n", __func__, __LINE__);
+ result = -1;
+ goto zero_region;
+ }
+
+ result = lv1_allocate_memory(r->size, PAGE_SHIFT_16M, 0,
+ ALLOCATE_MEMORY_TRY_ALT_UNIT, &r->base, &muid);
+
+ if (result || r->base < map.rm.size) {
+ DBG("%s:%d: lv1_allocate_memory failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ goto zero_region;
+ }
+
+ r->destroy = 1;
+ r->offset = r->base - map.rm.size;
+ return result;
+
+zero_region:
+ r->size = r->base = r->offset = 0;
+ return result;
+}
+
+/**
+ * ps3_mm_region_destroy - destroy a memory region
+ * @r: pointer to struct mem_region
+ */
+
+static void ps3_mm_region_destroy(struct mem_region *r)
+{
+ int result;
+
+ if (!r->destroy) {
+ return;
+ }
+
+ if (r->base) {
+ result = lv1_release_memory(r->base);
+
+ if (result) {
+ lv1_panic(0);
+ }
+
+ r->size = r->base = r->offset = 0;
+ map.total = map.rm.size;
+ }
+
+ ps3_mm_set_repository_highmem(NULL);
+}
+
+/*============================================================================*/
+/* dma routines */
+/*============================================================================*/
+
+/**
+ * dma_sb_lpar_to_bus - Translate an lpar address to ioc mapped bus address.
+ * @r: pointer to dma region structure
+ * @lpar_addr: HV lpar address
+ */
+
+static unsigned long dma_sb_lpar_to_bus(struct ps3_dma_region *r,
+ unsigned long lpar_addr)
+{
+ if (lpar_addr >= map.rm.size)
+ lpar_addr -= map.r1.offset;
+ BUG_ON(lpar_addr < r->offset);
+ BUG_ON(lpar_addr >= r->offset + r->len);
+ return r->bus_addr + lpar_addr - r->offset;
+}
+
+#define dma_dump_region(_a) _dma_dump_region(_a, __func__, __LINE__)
+static void __maybe_unused _dma_dump_region(const struct ps3_dma_region *r,
+ const char *func, int line)
+{
+ DBG("%s:%d: dev %llu:%llu\n", func, line, r->dev->bus_id,
+ r->dev->dev_id);
+ DBG("%s:%d: page_size %u\n", func, line, r->page_size);
+ DBG("%s:%d: bus_addr %lxh\n", func, line, r->bus_addr);
+ DBG("%s:%d: len %lxh\n", func, line, r->len);
+ DBG("%s:%d: offset %lxh\n", func, line, r->offset);
+}
+
+ /**
+ * dma_chunk - A chunk of dma pages mapped by the io controller.
+ * @region - The dma region that owns this chunk.
+ * @lpar_addr: Starting lpar address of the area to map.
+ * @bus_addr: Starting ioc bus address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * @link: A struct list_head used with struct ps3_dma_region.chunk_list, the
+ * list of all chunks owned by the region.
+ *
+ * This implementation uses a very simple dma page manager
+ * based on the dma_chunk structure. This scheme assumes
+ * that all drivers use very well behaved dma ops.
+ */
+
+struct dma_chunk {
+ struct ps3_dma_region *region;
+ unsigned long lpar_addr;
+ unsigned long bus_addr;
+ unsigned long len;
+ struct list_head link;
+ unsigned int usage_count;
+};
+
+#define dma_dump_chunk(_a) _dma_dump_chunk(_a, __func__, __LINE__)
+static void _dma_dump_chunk (const struct dma_chunk* c, const char* func,
+ int line)
+{
+ DBG("%s:%d: r.dev %llu:%llu\n", func, line,
+ c->region->dev->bus_id, c->region->dev->dev_id);
+ DBG("%s:%d: r.bus_addr %lxh\n", func, line, c->region->bus_addr);
+ DBG("%s:%d: r.page_size %u\n", func, line, c->region->page_size);
+ DBG("%s:%d: r.len %lxh\n", func, line, c->region->len);
+ DBG("%s:%d: r.offset %lxh\n", func, line, c->region->offset);
+ DBG("%s:%d: c.lpar_addr %lxh\n", func, line, c->lpar_addr);
+ DBG("%s:%d: c.bus_addr %lxh\n", func, line, c->bus_addr);
+ DBG("%s:%d: c.len %lxh\n", func, line, c->len);
+}
+
+static struct dma_chunk * dma_find_chunk(struct ps3_dma_region *r,
+ unsigned long bus_addr, unsigned long len)
+{
+ struct dma_chunk *c;
+ unsigned long aligned_bus = ALIGN_DOWN(bus_addr, 1 << r->page_size);
+ unsigned long aligned_len = ALIGN(len+bus_addr-aligned_bus,
+ 1 << r->page_size);
+
+ list_for_each_entry(c, &r->chunk_list.head, link) {
+ /* intersection */
+ if (aligned_bus >= c->bus_addr &&
+ aligned_bus + aligned_len <= c->bus_addr + c->len)
+ return c;
+
+ /* below */
+ if (aligned_bus + aligned_len <= c->bus_addr)
+ continue;
+
+ /* above */
+ if (aligned_bus >= c->bus_addr + c->len)
+ continue;
+
+ /* we don't handle the multi-chunk case for now */
+ dma_dump_chunk(c);
+ BUG();
+ }
+ return NULL;
+}
+
+static struct dma_chunk *dma_find_chunk_lpar(struct ps3_dma_region *r,
+ unsigned long lpar_addr, unsigned long len)
+{
+ struct dma_chunk *c;
+ unsigned long aligned_lpar = ALIGN_DOWN(lpar_addr, 1 << r->page_size);
+ unsigned long aligned_len = ALIGN(len + lpar_addr - aligned_lpar,
+ 1 << r->page_size);
+
+ list_for_each_entry(c, &r->chunk_list.head, link) {
+ /* intersection */
+ if (c->lpar_addr <= aligned_lpar &&
+ aligned_lpar < c->lpar_addr + c->len) {
+ if (aligned_lpar + aligned_len <= c->lpar_addr + c->len)
+ return c;
+ else {
+ dma_dump_chunk(c);
+ BUG();
+ }
+ }
+ /* below */
+ if (aligned_lpar + aligned_len <= c->lpar_addr) {
+ continue;
+ }
+ /* above */
+ if (c->lpar_addr + c->len <= aligned_lpar) {
+ continue;
+ }
+ }
+ return NULL;
+}
+
+static int dma_sb_free_chunk(struct dma_chunk *c)
+{
+ int result = 0;
+
+ if (c->bus_addr) {
+ result = lv1_unmap_device_dma_region(c->region->dev->bus_id,
+ c->region->dev->dev_id, c->bus_addr, c->len);
+ BUG_ON(result);
+ }
+
+ kfree(c);
+ return result;
+}
+
+static int dma_ioc0_free_chunk(struct dma_chunk *c)
+{
+ int result = 0;
+ int iopage;
+ unsigned long offset;
+ struct ps3_dma_region *r = c->region;
+
+ DBG("%s:start\n", __func__);
+ for (iopage = 0; iopage < (c->len >> r->page_size); iopage++) {
+ offset = (1 << r->page_size) * iopage;
+ /* put INVALID entry */
+ result = lv1_put_iopte(0,
+ c->bus_addr + offset,
+ c->lpar_addr + offset,
+ r->ioid,
+ 0);
+ DBG("%s: bus=%#lx, lpar=%#lx, ioid=%d\n", __func__,
+ c->bus_addr + offset,
+ c->lpar_addr + offset,
+ r->ioid);
+
+ if (result) {
+ DBG("%s:%d: lv1_put_iopte failed: %s\n", __func__,
+ __LINE__, ps3_result(result));
+ }
+ }
+ kfree(c);
+ DBG("%s:end\n", __func__);
+ return result;
+}
+
+/**
+ * dma_sb_map_pages - Maps dma pages into the io controller bus address space.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @phys_addr: Starting physical address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * c_out: A pointer to receive an allocated struct dma_chunk for this area.
+ *
+ * This is the lowest level dma mapping routine, and is the one that will
+ * make the HV call to add the pages into the io controller address space.
+ */
+
+static int dma_sb_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
+ unsigned long len, struct dma_chunk **c_out, u64 iopte_flag)
+{
+ int result;
+ struct dma_chunk *c;
+
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
+ if (!c) {
+ result = -ENOMEM;
+ goto fail_alloc;
+ }
+
+ c->region = r;
+ c->lpar_addr = ps3_mm_phys_to_lpar(phys_addr);
+ c->bus_addr = dma_sb_lpar_to_bus(r, c->lpar_addr);
+ c->len = len;
+
+ BUG_ON(iopte_flag != 0xf800000000000000UL);
+ result = lv1_map_device_dma_region(c->region->dev->bus_id,
+ c->region->dev->dev_id, c->lpar_addr,
+ c->bus_addr, c->len, iopte_flag);
+ if (result) {
+ DBG("%s:%d: lv1_map_device_dma_region failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ goto fail_map;
+ }
+
+ list_add(&c->link, &r->chunk_list.head);
+
+ *c_out = c;
+ return 0;
+
+fail_map:
+ kfree(c);
+fail_alloc:
+ *c_out = NULL;
+ DBG(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+}
+
+static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
+ unsigned long len, struct dma_chunk **c_out,
+ u64 iopte_flag)
+{
+ int result;
+ struct dma_chunk *c, *last;
+ int iopage, pages;
+ unsigned long offset;
+
+ DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__,
+ phys_addr, ps3_mm_phys_to_lpar(phys_addr), len);
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
+ if (!c) {
+ result = -ENOMEM;
+ goto fail_alloc;
+ }
+
+ c->region = r;
+ c->len = len;
+ c->lpar_addr = ps3_mm_phys_to_lpar(phys_addr);
+ /* allocate IO address */
+ if (list_empty(&r->chunk_list.head)) {
+ /* first one */
+ c->bus_addr = r->bus_addr;
+ } else {
+ /* derive from last bus addr*/
+ last = list_entry(r->chunk_list.head.next,
+ struct dma_chunk, link);
+ c->bus_addr = last->bus_addr + last->len;
+ DBG("%s: last bus=%#lx, len=%#lx\n", __func__,
+ last->bus_addr, last->len);
+ }
+
+ /* FIXME: check whether length exceeds region size */
+
+ /* build ioptes for the area */
+ pages = len >> r->page_size;
+ DBG("%s: pgsize=%#x len=%#lx pages=%#x iopteflag=%#llx\n", __func__,
+ r->page_size, r->len, pages, iopte_flag);
+ for (iopage = 0; iopage < pages; iopage++) {
+ offset = (1 << r->page_size) * iopage;
+ result = lv1_put_iopte(0,
+ c->bus_addr + offset,
+ c->lpar_addr + offset,
+ r->ioid,
+ iopte_flag);
+ if (result) {
+ pr_warn("%s:%d: lv1_put_iopte failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ goto fail_map;
+ }
+ DBG("%s: pg=%d bus=%#lx, lpar=%#lx, ioid=%#x\n", __func__,
+ iopage, c->bus_addr + offset, c->lpar_addr + offset,
+ r->ioid);
+ }
+
+ /* be sure that last allocated one is inserted at head */
+ list_add(&c->link, &r->chunk_list.head);
+
+ *c_out = c;
+ DBG("%s: end\n", __func__);
+ return 0;
+
+fail_map:
+ for (iopage--; 0 <= iopage; iopage--) {
+ lv1_put_iopte(0,
+ c->bus_addr + offset,
+ c->lpar_addr + offset,
+ r->ioid,
+ 0);
+ }
+ kfree(c);
+fail_alloc:
+ *c_out = NULL;
+ return result;
+}
+
+/**
+ * dma_sb_region_create - Create a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This is the lowest level dma region create routine, and is the one that
+ * will make the HV call to create the region.
+ */
+
+static int dma_sb_region_create(struct ps3_dma_region *r)
+{
+ int result;
+ u64 bus_addr;
+
+ DBG(" -> %s:%d:\n", __func__, __LINE__);
+
+ BUG_ON(!r);
+
+ if (!r->dev->bus_id) {
+ pr_info("%s:%d: %llu:%llu no dma\n", __func__, __LINE__,
+ r->dev->bus_id, r->dev->dev_id);
+ return 0;
+ }
+
+ DBG("%s:%u: len = 0x%lx, page_size = %u, offset = 0x%lx\n", __func__,
+ __LINE__, r->len, r->page_size, r->offset);
+
+ BUG_ON(!r->len);
+ BUG_ON(!r->page_size);
+ BUG_ON(!r->region_ops);
+
+ INIT_LIST_HEAD(&r->chunk_list.head);
+ spin_lock_init(&r->chunk_list.lock);
+
+ result = lv1_allocate_device_dma_region(r->dev->bus_id, r->dev->dev_id,
+ roundup_pow_of_two(r->len), r->page_size, r->region_type,
+ &bus_addr);
+ r->bus_addr = bus_addr;
+
+ if (result) {
+ DBG("%s:%d: lv1_allocate_device_dma_region failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ r->len = r->bus_addr = 0;
+ }
+
+ return result;
+}
+
+static int dma_ioc0_region_create(struct ps3_dma_region *r)
+{
+ int result;
+ u64 bus_addr;
+
+ INIT_LIST_HEAD(&r->chunk_list.head);
+ spin_lock_init(&r->chunk_list.lock);
+
+ result = lv1_allocate_io_segment(0,
+ r->len,
+ r->page_size,
+ &bus_addr);
+ r->bus_addr = bus_addr;
+ if (result) {
+ DBG("%s:%d: lv1_allocate_io_segment failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ r->len = r->bus_addr = 0;
+ }
+ DBG("%s: len=%#lx, pg=%d, bus=%#lx\n", __func__,
+ r->len, r->page_size, r->bus_addr);
+ return result;
+}
+
+/**
+ * dma_region_free - Free a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This is the lowest level dma region free routine, and is the one that
+ * will make the HV call to free the region.
+ */
+
+static int dma_sb_region_free(struct ps3_dma_region *r)
+{
+ int result;
+ struct dma_chunk *c;
+ struct dma_chunk *tmp;
+
+ BUG_ON(!r);
+
+ if (!r->dev->bus_id) {
+ pr_info("%s:%d: %llu:%llu no dma\n", __func__, __LINE__,
+ r->dev->bus_id, r->dev->dev_id);
+ return 0;
+ }
+
+ list_for_each_entry_safe(c, tmp, &r->chunk_list.head, link) {
+ list_del(&c->link);
+ dma_sb_free_chunk(c);
+ }
+
+ result = lv1_free_device_dma_region(r->dev->bus_id, r->dev->dev_id,
+ r->bus_addr);
+
+ if (result)
+ DBG("%s:%d: lv1_free_device_dma_region failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+
+ r->bus_addr = 0;
+
+ return result;
+}
+
+static int dma_ioc0_region_free(struct ps3_dma_region *r)
+{
+ int result;
+ struct dma_chunk *c, *n;
+
+ DBG("%s: start\n", __func__);
+ list_for_each_entry_safe(c, n, &r->chunk_list.head, link) {
+ list_del(&c->link);
+ dma_ioc0_free_chunk(c);
+ }
+
+ result = lv1_release_io_segment(0, r->bus_addr);
+
+ if (result)
+ DBG("%s:%d: lv1_free_device_dma_region failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+
+ r->bus_addr = 0;
+ DBG("%s: end\n", __func__);
+
+ return result;
+}
+
+/**
+ * dma_sb_map_area - Map an area of memory into a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @virt_addr: Starting virtual address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * @bus_addr: A pointer to return the starting ioc bus address of the area to
+ * map.
+ *
+ * This is the common dma mapping routine.
+ */
+
+static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr,
+ unsigned long len, dma_addr_t *bus_addr,
+ u64 iopte_flag)
+{
+ int result;
+ unsigned long flags;
+ struct dma_chunk *c;
+ unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
+ : virt_addr;
+ unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size);
+ unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys,
+ 1 << r->page_size);
+ *bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr));
+
+ if (!USE_DYNAMIC_DMA) {
+ unsigned long lpar_addr = ps3_mm_phys_to_lpar(phys_addr);
+ DBG(" -> %s:%d\n", __func__, __LINE__);
+ DBG("%s:%d virt_addr %lxh\n", __func__, __LINE__,
+ virt_addr);
+ DBG("%s:%d phys_addr %lxh\n", __func__, __LINE__,
+ phys_addr);
+ DBG("%s:%d lpar_addr %lxh\n", __func__, __LINE__,
+ lpar_addr);
+ DBG("%s:%d len %lxh\n", __func__, __LINE__, len);
+ DBG("%s:%d bus_addr %llxh (%lxh)\n", __func__, __LINE__,
+ *bus_addr, len);
+ }
+
+ spin_lock_irqsave(&r->chunk_list.lock, flags);
+ c = dma_find_chunk(r, *bus_addr, len);
+
+ if (c) {
+ DBG("%s:%d: reusing mapped chunk", __func__, __LINE__);
+ dma_dump_chunk(c);
+ c->usage_count++;
+ spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+ return 0;
+ }
+
+ result = dma_sb_map_pages(r, aligned_phys, aligned_len, &c, iopte_flag);
+
+ if (result) {
+ *bus_addr = 0;
+ DBG("%s:%d: dma_sb_map_pages failed (%d)\n",
+ __func__, __LINE__, result);
+ spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+ return result;
+ }
+
+ c->usage_count = 1;
+
+ spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+ return result;
+}
+
+static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr,
+ unsigned long len, dma_addr_t *bus_addr,
+ u64 iopte_flag)
+{
+ int result;
+ unsigned long flags;
+ struct dma_chunk *c;
+ unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
+ : virt_addr;
+ unsigned long aligned_phys = ALIGN_DOWN(phys_addr, 1 << r->page_size);
+ unsigned long aligned_len = ALIGN(len + phys_addr - aligned_phys,
+ 1 << r->page_size);
+
+ DBG(KERN_ERR "%s: vaddr=%#lx, len=%#lx\n", __func__,
+ virt_addr, len);
+ DBG(KERN_ERR "%s: ph=%#lx a_ph=%#lx a_l=%#lx\n", __func__,
+ phys_addr, aligned_phys, aligned_len);
+
+ spin_lock_irqsave(&r->chunk_list.lock, flags);
+ c = dma_find_chunk_lpar(r, ps3_mm_phys_to_lpar(phys_addr), len);
+
+ if (c) {
+ /* FIXME */
+ BUG();
+ *bus_addr = c->bus_addr + phys_addr - aligned_phys;
+ c->usage_count++;
+ spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+ return 0;
+ }
+
+ result = dma_ioc0_map_pages(r, aligned_phys, aligned_len, &c,
+ iopte_flag);
+
+ if (result) {
+ *bus_addr = 0;
+ DBG("%s:%d: dma_ioc0_map_pages failed (%d)\n",
+ __func__, __LINE__, result);
+ spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+ return result;
+ }
+ *bus_addr = c->bus_addr + phys_addr - aligned_phys;
+ DBG("%s: va=%#lx pa=%#lx a_pa=%#lx bus=%#llx\n", __func__,
+ virt_addr, phys_addr, aligned_phys, *bus_addr);
+ c->usage_count = 1;
+
+ spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+ return result;
+}
+
+/**
+ * dma_sb_unmap_area - Unmap an area of memory from a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @bus_addr: The starting ioc bus address of the area to unmap.
+ * @len: Length in bytes of the area to unmap.
+ *
+ * This is the common dma unmap routine.
+ */
+
+static int dma_sb_unmap_area(struct ps3_dma_region *r, dma_addr_t bus_addr,
+ unsigned long len)
+{
+ unsigned long flags;
+ struct dma_chunk *c;
+
+ spin_lock_irqsave(&r->chunk_list.lock, flags);
+ c = dma_find_chunk(r, bus_addr, len);
+
+ if (!c) {
+ unsigned long aligned_bus = ALIGN_DOWN(bus_addr,
+ 1 << r->page_size);
+ unsigned long aligned_len = ALIGN(len + bus_addr
+ - aligned_bus, 1 << r->page_size);
+ DBG("%s:%d: not found: bus_addr %llxh\n",
+ __func__, __LINE__, bus_addr);
+ DBG("%s:%d: not found: len %lxh\n",
+ __func__, __LINE__, len);
+ DBG("%s:%d: not found: aligned_bus %lxh\n",
+ __func__, __LINE__, aligned_bus);
+ DBG("%s:%d: not found: aligned_len %lxh\n",
+ __func__, __LINE__, aligned_len);
+ BUG();
+ }
+
+ c->usage_count--;
+
+ if (!c->usage_count) {
+ list_del(&c->link);
+ dma_sb_free_chunk(c);
+ }
+
+ spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+ return 0;
+}
+
+static int dma_ioc0_unmap_area(struct ps3_dma_region *r,
+ dma_addr_t bus_addr, unsigned long len)
+{
+ unsigned long flags;
+ struct dma_chunk *c;
+
+ DBG("%s: start a=%#llx l=%#lx\n", __func__, bus_addr, len);
+ spin_lock_irqsave(&r->chunk_list.lock, flags);
+ c = dma_find_chunk(r, bus_addr, len);
+
+ if (!c) {
+ unsigned long aligned_bus = ALIGN_DOWN(bus_addr,
+ 1 << r->page_size);
+ unsigned long aligned_len = ALIGN(len + bus_addr
+ - aligned_bus,
+ 1 << r->page_size);
+ DBG("%s:%d: not found: bus_addr %llxh\n",
+ __func__, __LINE__, bus_addr);
+ DBG("%s:%d: not found: len %lxh\n",
+ __func__, __LINE__, len);
+ DBG("%s:%d: not found: aligned_bus %lxh\n",
+ __func__, __LINE__, aligned_bus);
+ DBG("%s:%d: not found: aligned_len %lxh\n",
+ __func__, __LINE__, aligned_len);
+ BUG();
+ }
+
+ c->usage_count--;
+
+ if (!c->usage_count) {
+ list_del(&c->link);
+ dma_ioc0_free_chunk(c);
+ }
+
+ spin_unlock_irqrestore(&r->chunk_list.lock, flags);
+ DBG("%s: end\n", __func__);
+ return 0;
+}
+
+/**
+ * dma_sb_region_create_linear - Setup a linear dma mapping for a device.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This routine creates an HV dma region for the device and maps all available
+ * ram into the io controller bus address space.
+ */
+
+static int dma_sb_region_create_linear(struct ps3_dma_region *r)
+{
+ int result;
+ unsigned long virt_addr, len;
+ dma_addr_t tmp;
+
+ if (r->len > 16*1024*1024) { /* FIXME: need proper fix */
+ /* force 16M dma pages for linear mapping */
+ if (r->page_size != PS3_DMA_16M) {
+ pr_info("%s:%d: forcing 16M pages for linear map\n",
+ __func__, __LINE__);
+ r->page_size = PS3_DMA_16M;
+ r->len = ALIGN(r->len, 1 << r->page_size);
+ }
+ }
+
+ result = dma_sb_region_create(r);
+ BUG_ON(result);
+
+ if (r->offset < map.rm.size) {
+ /* Map (part of) 1st RAM chunk */
+ virt_addr = map.rm.base + r->offset;
+ len = map.rm.size - r->offset;
+ if (len > r->len)
+ len = r->len;
+ result = dma_sb_map_area(r, virt_addr, len, &tmp,
+ CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_SO_RW |
+ CBE_IOPTE_M);
+ BUG_ON(result);
+ }
+
+ if (r->offset + r->len > map.rm.size) {
+ /* Map (part of) 2nd RAM chunk */
+ virt_addr = map.rm.size;
+ len = r->len;
+ if (r->offset >= map.rm.size)
+ virt_addr += r->offset - map.rm.size;
+ else
+ len -= map.rm.size - r->offset;
+ result = dma_sb_map_area(r, virt_addr, len, &tmp,
+ CBE_IOPTE_PP_W | CBE_IOPTE_PP_R | CBE_IOPTE_SO_RW |
+ CBE_IOPTE_M);
+ BUG_ON(result);
+ }
+
+ return result;
+}
+
+/**
+ * dma_sb_region_free_linear - Free a linear dma mapping for a device.
+ * @r: Pointer to a struct ps3_dma_region.
+ *
+ * This routine will unmap all mapped areas and free the HV dma region.
+ */
+
+static int dma_sb_region_free_linear(struct ps3_dma_region *r)
+{
+ int result;
+ dma_addr_t bus_addr;
+ unsigned long len, lpar_addr;
+
+ if (r->offset < map.rm.size) {
+ /* Unmap (part of) 1st RAM chunk */
+ lpar_addr = map.rm.base + r->offset;
+ len = map.rm.size - r->offset;
+ if (len > r->len)
+ len = r->len;
+ bus_addr = dma_sb_lpar_to_bus(r, lpar_addr);
+ result = dma_sb_unmap_area(r, bus_addr, len);
+ BUG_ON(result);
+ }
+
+ if (r->offset + r->len > map.rm.size) {
+ /* Unmap (part of) 2nd RAM chunk */
+ lpar_addr = map.r1.base;
+ len = r->len;
+ if (r->offset >= map.rm.size)
+ lpar_addr += r->offset - map.rm.size;
+ else
+ len -= map.rm.size - r->offset;
+ bus_addr = dma_sb_lpar_to_bus(r, lpar_addr);
+ result = dma_sb_unmap_area(r, bus_addr, len);
+ BUG_ON(result);
+ }
+
+ result = dma_sb_region_free(r);
+ BUG_ON(result);
+
+ return result;
+}
+
+/**
+ * dma_sb_map_area_linear - Map an area of memory into a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @virt_addr: Starting virtual address of the area to map.
+ * @len: Length in bytes of the area to map.
+ * @bus_addr: A pointer to return the starting ioc bus address of the area to
+ * map.
+ *
+ * This routine just returns the corresponding bus address. Actual mapping
+ * occurs in dma_region_create_linear().
+ */
+
+static int dma_sb_map_area_linear(struct ps3_dma_region *r,
+ unsigned long virt_addr, unsigned long len, dma_addr_t *bus_addr,
+ u64 iopte_flag)
+{
+ unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr)
+ : virt_addr;
+ *bus_addr = dma_sb_lpar_to_bus(r, ps3_mm_phys_to_lpar(phys_addr));
+ return 0;
+}
+
+/**
+ * dma_unmap_area_linear - Unmap an area of memory from a device dma region.
+ * @r: Pointer to a struct ps3_dma_region.
+ * @bus_addr: The starting ioc bus address of the area to unmap.
+ * @len: Length in bytes of the area to unmap.
+ *
+ * This routine does nothing. Unmapping occurs in dma_sb_region_free_linear().
+ */
+
+static int dma_sb_unmap_area_linear(struct ps3_dma_region *r,
+ dma_addr_t bus_addr, unsigned long len)
+{
+ return 0;
+};
+
+static const struct ps3_dma_region_ops ps3_dma_sb_region_ops = {
+ .create = dma_sb_region_create,
+ .free = dma_sb_region_free,
+ .map = dma_sb_map_area,
+ .unmap = dma_sb_unmap_area
+};
+
+static const struct ps3_dma_region_ops ps3_dma_sb_region_linear_ops = {
+ .create = dma_sb_region_create_linear,
+ .free = dma_sb_region_free_linear,
+ .map = dma_sb_map_area_linear,
+ .unmap = dma_sb_unmap_area_linear
+};
+
+static const struct ps3_dma_region_ops ps3_dma_ioc0_region_ops = {
+ .create = dma_ioc0_region_create,
+ .free = dma_ioc0_region_free,
+ .map = dma_ioc0_map_area,
+ .unmap = dma_ioc0_unmap_area
+};
+
+int ps3_dma_region_init(struct ps3_system_bus_device *dev,
+ struct ps3_dma_region *r, enum ps3_dma_page_size page_size,
+ enum ps3_dma_region_type region_type, void *addr, unsigned long len)
+{
+ unsigned long lpar_addr;
+ int result;
+
+ lpar_addr = addr ? ps3_mm_phys_to_lpar(__pa(addr)) : 0;
+
+ r->dev = dev;
+ r->page_size = page_size;
+ r->region_type = region_type;
+ r->offset = lpar_addr;
+ if (r->offset >= map.rm.size)
+ r->offset -= map.r1.offset;
+ r->len = len ? len : ALIGN(map.total, 1 << r->page_size);
+
+ dev->core.dma_mask = &r->dma_mask;
+
+ result = dma_set_mask_and_coherent(&dev->core, DMA_BIT_MASK(32));
+
+ if (result < 0) {
+ dev_err(&dev->core, "%s:%d: dma_set_mask_and_coherent failed: %d\n",
+ __func__, __LINE__, result);
+ return result;
+ }
+
+ switch (dev->dev_type) {
+ case PS3_DEVICE_TYPE_SB:
+ r->region_ops = (USE_DYNAMIC_DMA)
+ ? &ps3_dma_sb_region_ops
+ : &ps3_dma_sb_region_linear_ops;
+ break;
+ case PS3_DEVICE_TYPE_IOC0:
+ r->region_ops = &ps3_dma_ioc0_region_ops;
+ break;
+ default:
+ BUG();
+ return -EINVAL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(ps3_dma_region_init);
+
+int ps3_dma_region_create(struct ps3_dma_region *r)
+{
+ BUG_ON(!r);
+ BUG_ON(!r->region_ops);
+ BUG_ON(!r->region_ops->create);
+ return r->region_ops->create(r);
+}
+EXPORT_SYMBOL(ps3_dma_region_create);
+
+int ps3_dma_region_free(struct ps3_dma_region *r)
+{
+ BUG_ON(!r);
+ BUG_ON(!r->region_ops);
+ BUG_ON(!r->region_ops->free);
+ return r->region_ops->free(r);
+}
+EXPORT_SYMBOL(ps3_dma_region_free);
+
+int ps3_dma_map(struct ps3_dma_region *r, unsigned long virt_addr,
+ unsigned long len, dma_addr_t *bus_addr,
+ u64 iopte_flag)
+{
+ return r->region_ops->map(r, virt_addr, len, bus_addr, iopte_flag);
+}
+
+int ps3_dma_unmap(struct ps3_dma_region *r, dma_addr_t bus_addr,
+ unsigned long len)
+{
+ return r->region_ops->unmap(r, bus_addr, len);
+}
+
+/*============================================================================*/
+/* system startup routines */
+/*============================================================================*/
+
+/**
+ * ps3_mm_init - initialize the address space state variables
+ */
+
+void __init ps3_mm_init(void)
+{
+ int result;
+
+ DBG(" -> %s:%d\n", __func__, __LINE__);
+
+ result = ps3_repository_read_mm_info(&map.rm.base, &map.rm.size,
+ &map.total);
+
+ if (result)
+ panic("ps3_repository_read_mm_info() failed");
+
+ map.rm.offset = map.rm.base;
+ map.vas_id = map.htab_size = 0;
+
+ /* this implementation assumes map.rm.base is zero */
+
+ BUG_ON(map.rm.base);
+ BUG_ON(!map.rm.size);
+
+ /* Check if we got the highmem region from an earlier boot step */
+
+ if (ps3_mm_get_repository_highmem(&map.r1)) {
+ result = ps3_mm_region_create(&map.r1, map.total - map.rm.size);
+
+ if (!result)
+ ps3_mm_set_repository_highmem(&map.r1);
+ }
+
+ /* correct map.total for the real total amount of memory we use */
+ map.total = map.rm.size + map.r1.size;
+
+ if (!map.r1.size) {
+ DBG("%s:%d: No highmem region found\n", __func__, __LINE__);
+ } else {
+ DBG("%s:%d: Adding highmem region: %llxh %llxh\n",
+ __func__, __LINE__, map.rm.size,
+ map.total - map.rm.size);
+ memblock_add(map.rm.size, map.total - map.rm.size);
+ }
+
+ DBG(" <- %s:%d\n", __func__, __LINE__);
+}
+
+/**
+ * ps3_mm_shutdown - final cleanup of address space
+ *
+ * called during kexec sequence with MMU off.
+ */
+
+notrace void ps3_mm_shutdown(void)
+{
+ ps3_mm_region_destroy(&map.r1);
+}
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
new file mode 100644
index 000000000..b384cd2d6
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -0,0 +1,830 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 flash memory os area.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/workqueue.h>
+#include <linux/fs.h>
+#include <linux/syscalls.h>
+#include <linux/export.h>
+#include <linux/ctype.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include "platform.h"
+
+enum {
+ OS_AREA_SEGMENT_SIZE = 0X200,
+};
+
+enum os_area_ldr_format {
+ HEADER_LDR_FORMAT_RAW = 0,
+ HEADER_LDR_FORMAT_GZIP = 1,
+};
+
+#define OS_AREA_HEADER_MAGIC_NUM "cell_ext_os_area"
+
+/**
+ * struct os_area_header - os area header segment.
+ * @magic_num: Always 'cell_ext_os_area'.
+ * @hdr_version: Header format version number.
+ * @db_area_offset: Starting segment number of other os database area.
+ * @ldr_area_offset: Starting segment number of bootloader image area.
+ * @ldr_format: HEADER_LDR_FORMAT flag.
+ * @ldr_size: Size of bootloader image in bytes.
+ *
+ * Note that the docs refer to area offsets. These are offsets in units of
+ * segments from the start of the os area (top of the header). These are
+ * better thought of as segment numbers. The os area of the os area is
+ * reserved for the os image.
+ */
+
+struct os_area_header {
+ u8 magic_num[16];
+ u32 hdr_version;
+ u32 db_area_offset;
+ u32 ldr_area_offset;
+ u32 _reserved_1;
+ u32 ldr_format;
+ u32 ldr_size;
+ u32 _reserved_2[6];
+};
+
+enum os_area_boot_flag {
+ PARAM_BOOT_FLAG_GAME_OS = 0,
+ PARAM_BOOT_FLAG_OTHER_OS = 1,
+};
+
+enum os_area_ctrl_button {
+ PARAM_CTRL_BUTTON_O_IS_YES = 0,
+ PARAM_CTRL_BUTTON_X_IS_YES = 1,
+};
+
+/**
+ * struct os_area_params - os area params segment.
+ * @boot_flag: User preference of operating system, PARAM_BOOT_FLAG flag.
+ * @num_params: Number of params in this (params) segment.
+ * @rtc_diff: Difference in seconds between 1970 and the ps3 rtc value.
+ * @av_multi_out: User preference of AV output, PARAM_AV_MULTI_OUT flag.
+ * @ctrl_button: User preference of controller button config, PARAM_CTRL_BUTTON
+ * flag.
+ * @static_ip_addr: User preference of static IP address.
+ * @network_mask: User preference of static network mask.
+ * @default_gateway: User preference of static default gateway.
+ * @dns_primary: User preference of static primary dns server.
+ * @dns_secondary: User preference of static secondary dns server.
+ *
+ * The ps3 rtc maintains a read-only value that approximates seconds since
+ * 2000-01-01 00:00:00 UTC.
+ *
+ * User preference of zero for static_ip_addr means use dhcp.
+ */
+
+struct os_area_params {
+ u32 boot_flag;
+ u32 _reserved_1[3];
+ u32 num_params;
+ u32 _reserved_2[3];
+ /* param 0 */
+ s64 rtc_diff;
+ u8 av_multi_out;
+ u8 ctrl_button;
+ u8 _reserved_3[6];
+ /* param 1 */
+ u8 static_ip_addr[4];
+ u8 network_mask[4];
+ u8 default_gateway[4];
+ u8 _reserved_4[4];
+ /* param 2 */
+ u8 dns_primary[4];
+ u8 dns_secondary[4];
+ u8 _reserved_5[8];
+};
+
+#define OS_AREA_DB_MAGIC_NUM "-db-"
+
+/**
+ * struct os_area_db - Shared flash memory database.
+ * @magic_num: Always '-db-'.
+ * @version: os_area_db format version number.
+ * @index_64: byte offset of the database id index for 64 bit variables.
+ * @count_64: number of usable 64 bit index entries
+ * @index_32: byte offset of the database id index for 32 bit variables.
+ * @count_32: number of usable 32 bit index entries
+ * @index_16: byte offset of the database id index for 16 bit variables.
+ * @count_16: number of usable 16 bit index entries
+ *
+ * Flash rom storage for exclusive use by guests running in the other os lpar.
+ * The current system configuration allocates 1K (two segments) for other os
+ * use.
+ */
+
+struct os_area_db {
+ u8 magic_num[4];
+ u16 version;
+ u16 _reserved_1;
+ u16 index_64;
+ u16 count_64;
+ u16 index_32;
+ u16 count_32;
+ u16 index_16;
+ u16 count_16;
+ u32 _reserved_2;
+ u8 _db_data[1000];
+};
+
+/**
+ * enum os_area_db_owner - Data owners.
+ */
+
+enum os_area_db_owner {
+ OS_AREA_DB_OWNER_ANY = -1,
+ OS_AREA_DB_OWNER_NONE = 0,
+ OS_AREA_DB_OWNER_PROTOTYPE = 1,
+ OS_AREA_DB_OWNER_LINUX = 2,
+ OS_AREA_DB_OWNER_PETITBOOT = 3,
+ OS_AREA_DB_OWNER_MAX = 32,
+};
+
+enum os_area_db_key {
+ OS_AREA_DB_KEY_ANY = -1,
+ OS_AREA_DB_KEY_NONE = 0,
+ OS_AREA_DB_KEY_RTC_DIFF = 1,
+ OS_AREA_DB_KEY_VIDEO_MODE = 2,
+ OS_AREA_DB_KEY_MAX = 8,
+};
+
+struct os_area_db_id {
+ int owner;
+ int key;
+};
+
+static const struct os_area_db_id os_area_db_id_empty = {
+ .owner = OS_AREA_DB_OWNER_NONE,
+ .key = OS_AREA_DB_KEY_NONE
+};
+
+static const struct os_area_db_id os_area_db_id_any = {
+ .owner = OS_AREA_DB_OWNER_ANY,
+ .key = OS_AREA_DB_KEY_ANY
+};
+
+static const struct os_area_db_id os_area_db_id_rtc_diff = {
+ .owner = OS_AREA_DB_OWNER_LINUX,
+ .key = OS_AREA_DB_KEY_RTC_DIFF
+};
+
+#define SECONDS_FROM_1970_TO_2000 946684800LL
+
+/**
+ * struct saved_params - Static working copies of data from the PS3 'os area'.
+ *
+ * The order of preference we use for the rtc_diff source:
+ * 1) The database value.
+ * 2) The game os value.
+ * 3) The number of seconds from 1970 to 2000.
+ */
+
+static struct saved_params {
+ unsigned int valid;
+ s64 rtc_diff;
+ unsigned int av_multi_out;
+} saved_params;
+
+static struct property property_rtc_diff = {
+ .name = "linux,rtc_diff",
+ .length = sizeof(saved_params.rtc_diff),
+ .value = &saved_params.rtc_diff,
+};
+
+static struct property property_av_multi_out = {
+ .name = "linux,av_multi_out",
+ .length = sizeof(saved_params.av_multi_out),
+ .value = &saved_params.av_multi_out,
+};
+
+
+static DEFINE_MUTEX(os_area_flash_mutex);
+
+static const struct ps3_os_area_flash_ops *os_area_flash_ops;
+
+void ps3_os_area_flash_register(const struct ps3_os_area_flash_ops *ops)
+{
+ mutex_lock(&os_area_flash_mutex);
+ os_area_flash_ops = ops;
+ mutex_unlock(&os_area_flash_mutex);
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_flash_register);
+
+static ssize_t os_area_flash_read(void *buf, size_t count, loff_t pos)
+{
+ ssize_t res = -ENODEV;
+
+ mutex_lock(&os_area_flash_mutex);
+ if (os_area_flash_ops)
+ res = os_area_flash_ops->read(buf, count, pos);
+ mutex_unlock(&os_area_flash_mutex);
+
+ return res;
+}
+
+static ssize_t os_area_flash_write(const void *buf, size_t count, loff_t pos)
+{
+ ssize_t res = -ENODEV;
+
+ mutex_lock(&os_area_flash_mutex);
+ if (os_area_flash_ops)
+ res = os_area_flash_ops->write(buf, count, pos);
+ mutex_unlock(&os_area_flash_mutex);
+
+ return res;
+}
+
+
+/**
+ * os_area_set_property - Add or overwrite a saved_params value to the device tree.
+ *
+ * Overwrites an existing property.
+ */
+
+static void os_area_set_property(struct device_node *node,
+ struct property *prop)
+{
+ int result;
+ struct property *tmp = of_find_property(node, prop->name, NULL);
+
+ if (tmp) {
+ pr_debug("%s:%d found %s\n", __func__, __LINE__, prop->name);
+ of_remove_property(node, tmp);
+ }
+
+ result = of_add_property(node, prop);
+
+ if (result)
+ pr_debug("%s:%d of_set_property failed\n", __func__,
+ __LINE__);
+}
+
+/**
+ * os_area_get_property - Get a saved_params value from the device tree.
+ *
+ */
+
+static void __init os_area_get_property(struct device_node *node,
+ struct property *prop)
+{
+ const struct property *tmp = of_find_property(node, prop->name, NULL);
+
+ if (tmp) {
+ BUG_ON(prop->length != tmp->length);
+ memcpy(prop->value, tmp->value, prop->length);
+ } else
+ pr_debug("%s:%d not found %s\n", __func__, __LINE__,
+ prop->name);
+}
+
+static void dump_field(char *s, const u8 *field, int size_of_field)
+{
+#if defined(DEBUG)
+ int i;
+
+ for (i = 0; i < size_of_field; i++)
+ s[i] = isprint(field[i]) ? field[i] : '.';
+ s[i] = 0;
+#endif
+}
+
+#define dump_header(_a) _dump_header(_a, __func__, __LINE__)
+static void _dump_header(const struct os_area_header *h, const char *func,
+ int line)
+{
+ char str[sizeof(h->magic_num) + 1];
+
+ dump_field(str, h->magic_num, sizeof(h->magic_num));
+ pr_debug("%s:%d: h.magic_num: '%s'\n", func, line,
+ str);
+ pr_debug("%s:%d: h.hdr_version: %u\n", func, line,
+ h->hdr_version);
+ pr_debug("%s:%d: h.db_area_offset: %u\n", func, line,
+ h->db_area_offset);
+ pr_debug("%s:%d: h.ldr_area_offset: %u\n", func, line,
+ h->ldr_area_offset);
+ pr_debug("%s:%d: h.ldr_format: %u\n", func, line,
+ h->ldr_format);
+ pr_debug("%s:%d: h.ldr_size: %xh\n", func, line,
+ h->ldr_size);
+}
+
+#define dump_params(_a) _dump_params(_a, __func__, __LINE__)
+static void _dump_params(const struct os_area_params *p, const char *func,
+ int line)
+{
+ pr_debug("%s:%d: p.boot_flag: %u\n", func, line, p->boot_flag);
+ pr_debug("%s:%d: p.num_params: %u\n", func, line, p->num_params);
+ pr_debug("%s:%d: p.rtc_diff %lld\n", func, line, p->rtc_diff);
+ pr_debug("%s:%d: p.av_multi_out %u\n", func, line, p->av_multi_out);
+ pr_debug("%s:%d: p.ctrl_button: %u\n", func, line, p->ctrl_button);
+ pr_debug("%s:%d: p.static_ip_addr: %u.%u.%u.%u\n", func, line,
+ p->static_ip_addr[0], p->static_ip_addr[1],
+ p->static_ip_addr[2], p->static_ip_addr[3]);
+ pr_debug("%s:%d: p.network_mask: %u.%u.%u.%u\n", func, line,
+ p->network_mask[0], p->network_mask[1],
+ p->network_mask[2], p->network_mask[3]);
+ pr_debug("%s:%d: p.default_gateway: %u.%u.%u.%u\n", func, line,
+ p->default_gateway[0], p->default_gateway[1],
+ p->default_gateway[2], p->default_gateway[3]);
+ pr_debug("%s:%d: p.dns_primary: %u.%u.%u.%u\n", func, line,
+ p->dns_primary[0], p->dns_primary[1],
+ p->dns_primary[2], p->dns_primary[3]);
+ pr_debug("%s:%d: p.dns_secondary: %u.%u.%u.%u\n", func, line,
+ p->dns_secondary[0], p->dns_secondary[1],
+ p->dns_secondary[2], p->dns_secondary[3]);
+}
+
+static int verify_header(const struct os_area_header *header)
+{
+ if (memcmp(header->magic_num, OS_AREA_HEADER_MAGIC_NUM,
+ sizeof(header->magic_num))) {
+ pr_debug("%s:%d magic_num failed\n", __func__, __LINE__);
+ return -1;
+ }
+
+ if (header->hdr_version < 1) {
+ pr_debug("%s:%d hdr_version failed\n", __func__, __LINE__);
+ return -1;
+ }
+
+ if (header->db_area_offset > header->ldr_area_offset) {
+ pr_debug("%s:%d offsets failed\n", __func__, __LINE__);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int db_verify(const struct os_area_db *db)
+{
+ if (memcmp(db->magic_num, OS_AREA_DB_MAGIC_NUM,
+ sizeof(db->magic_num))) {
+ pr_debug("%s:%d magic_num failed\n", __func__, __LINE__);
+ return -EINVAL;
+ }
+
+ if (db->version != 1) {
+ pr_debug("%s:%d version failed\n", __func__, __LINE__);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct db_index {
+ uint8_t owner:5;
+ uint8_t key:3;
+};
+
+struct db_iterator {
+ const struct os_area_db *db;
+ struct os_area_db_id match_id;
+ struct db_index *idx;
+ struct db_index *last_idx;
+ union {
+ uint64_t *value_64;
+ uint32_t *value_32;
+ uint16_t *value_16;
+ };
+};
+
+static unsigned int db_align_up(unsigned int val, unsigned int size)
+{
+ return (val + (size - 1)) & (~(size - 1));
+}
+
+/**
+ * db_for_each_64 - Iterator for 64 bit entries.
+ *
+ * A NULL value for id can be used to match all entries.
+ * OS_AREA_DB_OWNER_ANY and OS_AREA_DB_KEY_ANY can be used to match all.
+ */
+
+static int db_for_each_64(const struct os_area_db *db,
+ const struct os_area_db_id *match_id, struct db_iterator *i)
+{
+next:
+ if (!i->db) {
+ i->db = db;
+ i->match_id = match_id ? *match_id : os_area_db_id_any;
+ i->idx = (void *)db + db->index_64;
+ i->last_idx = i->idx + db->count_64;
+ i->value_64 = (void *)db + db->index_64
+ + db_align_up(db->count_64, 8);
+ } else {
+ i->idx++;
+ i->value_64++;
+ }
+
+ if (i->idx >= i->last_idx) {
+ pr_debug("%s:%d: reached end\n", __func__, __LINE__);
+ return 0;
+ }
+
+ if (i->match_id.owner != OS_AREA_DB_OWNER_ANY
+ && i->match_id.owner != (int)i->idx->owner)
+ goto next;
+ if (i->match_id.key != OS_AREA_DB_KEY_ANY
+ && i->match_id.key != (int)i->idx->key)
+ goto next;
+
+ return 1;
+}
+
+static int db_delete_64(struct os_area_db *db, const struct os_area_db_id *id)
+{
+ struct db_iterator i;
+
+ for (i.db = NULL; db_for_each_64(db, id, &i); ) {
+
+ pr_debug("%s:%d: got (%d:%d) %llxh\n", __func__, __LINE__,
+ i.idx->owner, i.idx->key,
+ (unsigned long long)*i.value_64);
+
+ i.idx->owner = 0;
+ i.idx->key = 0;
+ *i.value_64 = 0;
+ }
+ return 0;
+}
+
+static int db_set_64(struct os_area_db *db, const struct os_area_db_id *id,
+ uint64_t value)
+{
+ struct db_iterator i;
+
+ pr_debug("%s:%d: (%d:%d) <= %llxh\n", __func__, __LINE__,
+ id->owner, id->key, (unsigned long long)value);
+
+ if (!id->owner || id->owner == OS_AREA_DB_OWNER_ANY
+ || id->key == OS_AREA_DB_KEY_ANY) {
+ pr_debug("%s:%d: bad id: (%d:%d)\n", __func__,
+ __LINE__, id->owner, id->key);
+ return -1;
+ }
+
+ db_delete_64(db, id);
+
+ i.db = NULL;
+ if (db_for_each_64(db, &os_area_db_id_empty, &i)) {
+
+ pr_debug("%s:%d: got (%d:%d) %llxh\n", __func__, __LINE__,
+ i.idx->owner, i.idx->key,
+ (unsigned long long)*i.value_64);
+
+ i.idx->owner = id->owner;
+ i.idx->key = id->key;
+ *i.value_64 = value;
+
+ pr_debug("%s:%d: set (%d:%d) <= %llxh\n", __func__, __LINE__,
+ i.idx->owner, i.idx->key,
+ (unsigned long long)*i.value_64);
+ return 0;
+ }
+ pr_debug("%s:%d: database full.\n",
+ __func__, __LINE__);
+ return -1;
+}
+
+static int __init db_get_64(const struct os_area_db *db,
+ const struct os_area_db_id *id, uint64_t *value)
+{
+ struct db_iterator i;
+
+ i.db = NULL;
+ if (db_for_each_64(db, id, &i)) {
+ *value = *i.value_64;
+ pr_debug("%s:%d: found %lld\n", __func__, __LINE__,
+ (long long int)*i.value_64);
+ return 0;
+ }
+ pr_debug("%s:%d: not found\n", __func__, __LINE__);
+ return -1;
+}
+
+static int __init db_get_rtc_diff(const struct os_area_db *db, int64_t *rtc_diff)
+{
+ return db_get_64(db, &os_area_db_id_rtc_diff, (uint64_t*)rtc_diff);
+}
+
+#define dump_db(a) _dump_db(a, __func__, __LINE__)
+static void _dump_db(const struct os_area_db *db, const char *func,
+ int line)
+{
+ char str[sizeof(db->magic_num) + 1];
+
+ dump_field(str, db->magic_num, sizeof(db->magic_num));
+ pr_debug("%s:%d: db.magic_num: '%s'\n", func, line,
+ str);
+ pr_debug("%s:%d: db.version: %u\n", func, line,
+ db->version);
+ pr_debug("%s:%d: db.index_64: %u\n", func, line,
+ db->index_64);
+ pr_debug("%s:%d: db.count_64: %u\n", func, line,
+ db->count_64);
+ pr_debug("%s:%d: db.index_32: %u\n", func, line,
+ db->index_32);
+ pr_debug("%s:%d: db.count_32: %u\n", func, line,
+ db->count_32);
+ pr_debug("%s:%d: db.index_16: %u\n", func, line,
+ db->index_16);
+ pr_debug("%s:%d: db.count_16: %u\n", func, line,
+ db->count_16);
+}
+
+static void os_area_db_init(struct os_area_db *db)
+{
+ enum {
+ HEADER_SIZE = offsetof(struct os_area_db, _db_data),
+ INDEX_64_COUNT = 64,
+ VALUES_64_COUNT = 57,
+ INDEX_32_COUNT = 64,
+ VALUES_32_COUNT = 57,
+ INDEX_16_COUNT = 64,
+ VALUES_16_COUNT = 57,
+ };
+
+ memset(db, 0, sizeof(struct os_area_db));
+
+ memcpy(db->magic_num, OS_AREA_DB_MAGIC_NUM, sizeof(db->magic_num));
+ db->version = 1;
+ db->index_64 = HEADER_SIZE;
+ db->count_64 = VALUES_64_COUNT;
+ db->index_32 = HEADER_SIZE
+ + INDEX_64_COUNT * sizeof(struct db_index)
+ + VALUES_64_COUNT * sizeof(u64);
+ db->count_32 = VALUES_32_COUNT;
+ db->index_16 = HEADER_SIZE
+ + INDEX_64_COUNT * sizeof(struct db_index)
+ + VALUES_64_COUNT * sizeof(u64)
+ + INDEX_32_COUNT * sizeof(struct db_index)
+ + VALUES_32_COUNT * sizeof(u32);
+ db->count_16 = VALUES_16_COUNT;
+
+ /* Rules to check db layout. */
+
+ BUILD_BUG_ON(sizeof(struct db_index) != 1);
+ BUILD_BUG_ON(sizeof(struct os_area_db) != 2 * OS_AREA_SEGMENT_SIZE);
+ BUILD_BUG_ON(INDEX_64_COUNT & 0x7);
+ BUILD_BUG_ON(VALUES_64_COUNT > INDEX_64_COUNT);
+ BUILD_BUG_ON(INDEX_32_COUNT & 0x7);
+ BUILD_BUG_ON(VALUES_32_COUNT > INDEX_32_COUNT);
+ BUILD_BUG_ON(INDEX_16_COUNT & 0x7);
+ BUILD_BUG_ON(VALUES_16_COUNT > INDEX_16_COUNT);
+ BUILD_BUG_ON(HEADER_SIZE
+ + INDEX_64_COUNT * sizeof(struct db_index)
+ + VALUES_64_COUNT * sizeof(u64)
+ + INDEX_32_COUNT * sizeof(struct db_index)
+ + VALUES_32_COUNT * sizeof(u32)
+ + INDEX_16_COUNT * sizeof(struct db_index)
+ + VALUES_16_COUNT * sizeof(u16)
+ > sizeof(struct os_area_db));
+}
+
+/**
+ * update_flash_db - Helper for os_area_queue_work_handler.
+ *
+ */
+
+static int update_flash_db(void)
+{
+ const unsigned int buf_len = 8 * OS_AREA_SEGMENT_SIZE;
+ struct os_area_header *header;
+ ssize_t count;
+ int error;
+ loff_t pos;
+ struct os_area_db* db;
+
+ /* Read in header and db from flash. */
+
+ header = kmalloc(buf_len, GFP_KERNEL);
+ if (!header)
+ return -ENOMEM;
+
+ count = os_area_flash_read(header, buf_len, 0);
+ if (count < 0) {
+ pr_debug("%s: os_area_flash_read failed %zd\n", __func__,
+ count);
+ error = count;
+ goto fail;
+ }
+
+ pos = header->db_area_offset * OS_AREA_SEGMENT_SIZE;
+ if (count < OS_AREA_SEGMENT_SIZE || verify_header(header) ||
+ count < pos) {
+ pr_debug("%s: verify_header failed\n", __func__);
+ dump_header(header);
+ error = -EINVAL;
+ goto fail;
+ }
+
+ /* Now got a good db offset and some maybe good db data. */
+
+ db = (void *)header + pos;
+
+ error = db_verify(db);
+ if (error) {
+ pr_notice("%s: Verify of flash database failed, formatting.\n",
+ __func__);
+ dump_db(db);
+ os_area_db_init(db);
+ }
+
+ /* Now got good db data. */
+
+ db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff);
+
+ count = os_area_flash_write(db, sizeof(struct os_area_db), pos);
+ if (count < 0 || count < sizeof(struct os_area_db)) {
+ pr_debug("%s: os_area_flash_write failed %zd\n", __func__,
+ count);
+ error = count < 0 ? count : -EIO;
+ }
+
+fail:
+ kfree(header);
+ return error;
+}
+
+/**
+ * os_area_queue_work_handler - Asynchronous write handler.
+ *
+ * An asynchronous write for flash memory and the device tree. Do not
+ * call directly, use os_area_queue_work().
+ */
+
+static void os_area_queue_work_handler(struct work_struct *work)
+{
+ struct device_node *node;
+ int error;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ node = of_find_node_by_path("/");
+ if (node) {
+ os_area_set_property(node, &property_rtc_diff);
+ of_node_put(node);
+ } else
+ pr_debug("%s:%d of_find_node_by_path failed\n",
+ __func__, __LINE__);
+
+ error = update_flash_db();
+ if (error)
+ pr_warn("%s: Could not update FLASH ROM\n", __func__);
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+}
+
+static void os_area_queue_work(void)
+{
+ static DECLARE_WORK(q, os_area_queue_work_handler);
+
+ wmb();
+ schedule_work(&q);
+}
+
+/**
+ * ps3_os_area_save_params - Copy data from os area mirror to @saved_params.
+ *
+ * For the convenience of the guest the HV makes a copy of the os area in
+ * flash to a high address in the boot memory region and then puts that RAM
+ * address and the byte count into the repository for retrieval by the guest.
+ * We copy the data we want into a static variable and allow the memory setup
+ * by the HV to be claimed by the memblock manager.
+ *
+ * The os area mirror will not be available to a second stage kernel, and
+ * the header verify will fail. In this case, the saved_params values will
+ * be set from flash memory or the passed in device tree in ps3_os_area_init().
+ */
+
+void __init ps3_os_area_save_params(void)
+{
+ int result;
+ u64 lpar_addr;
+ unsigned int size;
+ struct os_area_header *header;
+ struct os_area_params *params;
+ struct os_area_db *db;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ result = ps3_repository_read_boot_dat_info(&lpar_addr, &size);
+
+ if (result) {
+ pr_debug("%s:%d ps3_repository_read_boot_dat_info failed\n",
+ __func__, __LINE__);
+ return;
+ }
+
+ header = (struct os_area_header *)__va(lpar_addr);
+ params = (struct os_area_params *)__va(lpar_addr
+ + OS_AREA_SEGMENT_SIZE);
+
+ result = verify_header(header);
+
+ if (result) {
+ /* Second stage kernels exit here. */
+ pr_debug("%s:%d verify_header failed\n", __func__, __LINE__);
+ dump_header(header);
+ return;
+ }
+
+ db = (struct os_area_db *)__va(lpar_addr
+ + header->db_area_offset * OS_AREA_SEGMENT_SIZE);
+
+ dump_header(header);
+ dump_params(params);
+ dump_db(db);
+
+ result = db_verify(db) || db_get_rtc_diff(db, &saved_params.rtc_diff);
+ if (result)
+ saved_params.rtc_diff = params->rtc_diff ? params->rtc_diff
+ : SECONDS_FROM_1970_TO_2000;
+ saved_params.av_multi_out = params->av_multi_out;
+ saved_params.valid = 1;
+
+ memset(header, 0, sizeof(*header));
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+}
+
+/**
+ * ps3_os_area_init - Setup os area device tree properties as needed.
+ */
+
+void __init ps3_os_area_init(void)
+{
+ struct device_node *node;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ node = of_find_node_by_path("/");
+
+ if (!saved_params.valid && node) {
+ /* Second stage kernels should have a dt entry. */
+ os_area_get_property(node, &property_rtc_diff);
+ os_area_get_property(node, &property_av_multi_out);
+ }
+
+ if(!saved_params.rtc_diff)
+ saved_params.rtc_diff = SECONDS_FROM_1970_TO_2000;
+
+ if (node) {
+ os_area_set_property(node, &property_rtc_diff);
+ os_area_set_property(node, &property_av_multi_out);
+ of_node_put(node);
+ } else
+ pr_debug("%s:%d of_find_node_by_path failed\n",
+ __func__, __LINE__);
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+}
+
+/**
+ * ps3_os_area_get_rtc_diff - Returns the rtc diff value.
+ */
+
+u64 ps3_os_area_get_rtc_diff(void)
+{
+ return saved_params.rtc_diff;
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_get_rtc_diff);
+
+/**
+ * ps3_os_area_set_rtc_diff - Set the rtc diff value.
+ *
+ * An asynchronous write is needed to support writing updates from
+ * the timer interrupt context.
+ */
+
+void ps3_os_area_set_rtc_diff(u64 rtc_diff)
+{
+ if (saved_params.rtc_diff != rtc_diff) {
+ saved_params.rtc_diff = rtc_diff;
+ os_area_queue_work();
+ }
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_set_rtc_diff);
+
+/**
+ * ps3_os_area_get_av_multi_out - Returns the default video mode.
+ */
+
+enum ps3_param_av_multi_out ps3_os_area_get_av_multi_out(void)
+{
+ return saved_params.av_multi_out;
+}
+EXPORT_SYMBOL_GPL(ps3_os_area_get_av_multi_out);
diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h
new file mode 100644
index 000000000..6beecdb0d
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/platform.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * PS3 platform declarations.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#if !defined(_PS3_PLATFORM_H)
+#define _PS3_PLATFORM_H
+
+#include <linux/rtc.h>
+#include <scsi/scsi.h>
+
+#include <asm/ps3.h>
+
+/* htab */
+
+void __init ps3_hpte_init(unsigned long htab_size);
+void __init ps3_map_htab(void);
+
+/* mm */
+
+void __init ps3_mm_init(void);
+void __init ps3_mm_vas_create(unsigned long* htab_size);
+void ps3_mm_vas_destroy(void);
+void ps3_mm_shutdown(void);
+
+/* irq */
+
+void ps3_init_IRQ(void);
+void ps3_shutdown_IRQ(int cpu);
+void __init ps3_register_ipi_debug_brk(unsigned int cpu, unsigned int virq);
+void __init ps3_register_ipi_irq(unsigned int cpu, unsigned int virq);
+
+/* smp */
+
+void __init smp_init_ps3(void);
+#ifdef CONFIG_SMP
+void ps3_smp_cleanup_cpu(int cpu);
+#else
+static inline void ps3_smp_cleanup_cpu(int cpu) { }
+#endif
+
+/* time */
+
+void __init ps3_calibrate_decr(void);
+time64_t __init ps3_get_boot_time(void);
+void ps3_get_rtc_time(struct rtc_time *time);
+int ps3_set_rtc_time(struct rtc_time *time);
+
+/* os area */
+
+void __init ps3_os_area_save_params(void);
+void __init ps3_os_area_init(void);
+
+/* spu */
+
+#if defined(CONFIG_SPU_BASE)
+void ps3_spu_set_platform (void);
+#else
+static inline void ps3_spu_set_platform (void) {}
+#endif
+
+/* repository bus info */
+
+enum ps3_bus_type {
+ PS3_BUS_TYPE_SB = 4,
+ PS3_BUS_TYPE_STORAGE = 5,
+};
+
+enum ps3_dev_type {
+ PS3_DEV_TYPE_STOR_DISK = TYPE_DISK, /* 0 */
+ PS3_DEV_TYPE_SB_GELIC = 3,
+ PS3_DEV_TYPE_SB_USB = 4,
+ PS3_DEV_TYPE_STOR_ROM = TYPE_ROM, /* 5 */
+ PS3_DEV_TYPE_SB_GPIO = 6,
+ PS3_DEV_TYPE_STOR_FLASH = TYPE_RBC, /* 14 */
+};
+
+int ps3_repository_read_bus_str(unsigned int bus_index, const char *bus_str,
+ u64 *value);
+int ps3_repository_read_bus_id(unsigned int bus_index, u64 *bus_id);
+int ps3_repository_read_bus_type(unsigned int bus_index,
+ enum ps3_bus_type *bus_type);
+int ps3_repository_read_bus_num_dev(unsigned int bus_index,
+ unsigned int *num_dev);
+
+/* repository bus device info */
+
+enum ps3_interrupt_type {
+ PS3_INTERRUPT_TYPE_EVENT_PORT = 2,
+ PS3_INTERRUPT_TYPE_SB_OHCI = 3,
+ PS3_INTERRUPT_TYPE_SB_EHCI = 4,
+ PS3_INTERRUPT_TYPE_OTHER = 5,
+};
+
+enum ps3_reg_type {
+ PS3_REG_TYPE_SB_OHCI = 3,
+ PS3_REG_TYPE_SB_EHCI = 4,
+ PS3_REG_TYPE_SB_GPIO = 5,
+};
+
+int ps3_repository_read_dev_str(unsigned int bus_index,
+ unsigned int dev_index, const char *dev_str, u64 *value);
+int ps3_repository_read_dev_id(unsigned int bus_index, unsigned int dev_index,
+ u64 *dev_id);
+int ps3_repository_read_dev_type(unsigned int bus_index,
+ unsigned int dev_index, enum ps3_dev_type *dev_type);
+int ps3_repository_read_dev_intr(unsigned int bus_index,
+ unsigned int dev_index, unsigned int intr_index,
+ enum ps3_interrupt_type *intr_type, unsigned int *interrupt_id);
+int ps3_repository_read_dev_reg_type(unsigned int bus_index,
+ unsigned int dev_index, unsigned int reg_index,
+ enum ps3_reg_type *reg_type);
+int ps3_repository_read_dev_reg_addr(unsigned int bus_index,
+ unsigned int dev_index, unsigned int reg_index, u64 *bus_addr,
+ u64 *len);
+int ps3_repository_read_dev_reg(unsigned int bus_index,
+ unsigned int dev_index, unsigned int reg_index,
+ enum ps3_reg_type *reg_type, u64 *bus_addr, u64 *len);
+
+/* repository bus enumerators */
+
+struct ps3_repository_device {
+ unsigned int bus_index;
+ unsigned int dev_index;
+ enum ps3_bus_type bus_type;
+ enum ps3_dev_type dev_type;
+ u64 bus_id;
+ u64 dev_id;
+};
+
+int ps3_repository_find_device(struct ps3_repository_device *repo);
+int ps3_repository_find_device_by_id(struct ps3_repository_device *repo,
+ u64 bus_id, u64 dev_id);
+int __init ps3_repository_find_devices(enum ps3_bus_type bus_type,
+ int (*callback)(const struct ps3_repository_device *repo));
+int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from,
+ unsigned int *bus_index);
+int ps3_repository_find_interrupt(const struct ps3_repository_device *repo,
+ enum ps3_interrupt_type intr_type, unsigned int *interrupt_id);
+int ps3_repository_find_reg(const struct ps3_repository_device *repo,
+ enum ps3_reg_type reg_type, u64 *bus_addr, u64 *len);
+
+/* repository block device info */
+
+int ps3_repository_read_stor_dev_port(unsigned int bus_index,
+ unsigned int dev_index, u64 *port);
+int ps3_repository_read_stor_dev_blk_size(unsigned int bus_index,
+ unsigned int dev_index, u64 *blk_size);
+int ps3_repository_read_stor_dev_num_blocks(unsigned int bus_index,
+ unsigned int dev_index, u64 *num_blocks);
+int ps3_repository_read_stor_dev_num_regions(unsigned int bus_index,
+ unsigned int dev_index, unsigned int *num_regions);
+int ps3_repository_read_stor_dev_region_id(unsigned int bus_index,
+ unsigned int dev_index, unsigned int region_index,
+ unsigned int *region_id);
+int ps3_repository_read_stor_dev_region_size(unsigned int bus_index,
+ unsigned int dev_index, unsigned int region_index, u64 *region_size);
+int ps3_repository_read_stor_dev_region_start(unsigned int bus_index,
+ unsigned int dev_index, unsigned int region_index, u64 *region_start);
+int ps3_repository_read_stor_dev_info(unsigned int bus_index,
+ unsigned int dev_index, u64 *port, u64 *blk_size,
+ u64 *num_blocks, unsigned int *num_regions);
+int ps3_repository_read_stor_dev_region(unsigned int bus_index,
+ unsigned int dev_index, unsigned int region_index,
+ unsigned int *region_id, u64 *region_start, u64 *region_size);
+
+/* repository logical pu and memory info */
+
+int ps3_repository_read_num_pu(u64 *num_pu);
+int ps3_repository_read_pu_id(unsigned int pu_index, u64 *pu_id);
+int ps3_repository_read_rm_base(unsigned int ppe_id, u64 *rm_base);
+int ps3_repository_read_rm_size(unsigned int ppe_id, u64 *rm_size);
+int ps3_repository_read_region_total(u64 *region_total);
+int ps3_repository_read_mm_info(u64 *rm_base, u64 *rm_size,
+ u64 *region_total);
+int ps3_repository_read_highmem_region_count(unsigned int *region_count);
+int ps3_repository_read_highmem_base(unsigned int region_index,
+ u64 *highmem_base);
+int ps3_repository_read_highmem_size(unsigned int region_index,
+ u64 *highmem_size);
+int ps3_repository_read_highmem_info(unsigned int region_index,
+ u64 *highmem_base, u64 *highmem_size);
+
+#if defined (CONFIG_PS3_REPOSITORY_WRITE)
+int ps3_repository_write_highmem_region_count(unsigned int region_count);
+int ps3_repository_write_highmem_base(unsigned int region_index,
+ u64 highmem_base);
+int ps3_repository_write_highmem_size(unsigned int region_index,
+ u64 highmem_size);
+int ps3_repository_write_highmem_info(unsigned int region_index,
+ u64 highmem_base, u64 highmem_size);
+int ps3_repository_delete_highmem_info(unsigned int region_index);
+#else
+static inline int ps3_repository_write_highmem_region_count(
+ unsigned int region_count) {return 0;}
+static inline int ps3_repository_write_highmem_base(unsigned int region_index,
+ u64 highmem_base) {return 0;}
+static inline int ps3_repository_write_highmem_size(unsigned int region_index,
+ u64 highmem_size) {return 0;}
+static inline int ps3_repository_write_highmem_info(unsigned int region_index,
+ u64 highmem_base, u64 highmem_size) {return 0;}
+static inline int ps3_repository_delete_highmem_info(unsigned int region_index)
+ {return 0;}
+#endif
+
+/* repository pme info */
+
+int ps3_repository_read_num_be(unsigned int *num_be);
+int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id);
+int ps3_repository_read_be_id(u64 node_id, u64 *be_id);
+int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq);
+int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq);
+
+/* repository performance monitor info */
+
+int ps3_repository_read_lpm_privileges(unsigned int be_index, u64 *lpar,
+ u64 *rights);
+
+/* repository 'Other OS' area */
+
+int ps3_repository_read_boot_dat_addr(u64 *lpar_addr);
+int ps3_repository_read_boot_dat_size(unsigned int *size);
+int ps3_repository_read_boot_dat_info(u64 *lpar_addr, unsigned int *size);
+
+/* repository spu info */
+
+/**
+ * enum spu_resource_type - Type of spu resource.
+ * @spu_resource_type_shared: Logical spu is shared with other partions.
+ * @spu_resource_type_exclusive: Logical spu is not shared with other partions.
+ *
+ * Returned by ps3_repository_read_spu_resource_id().
+ */
+
+enum ps3_spu_resource_type {
+ PS3_SPU_RESOURCE_TYPE_SHARED = 0,
+ PS3_SPU_RESOURCE_TYPE_EXCLUSIVE = 0x8000000000000000UL,
+};
+
+int ps3_repository_read_num_spu_reserved(unsigned int *num_spu_reserved);
+int ps3_repository_read_num_spu_resource_id(unsigned int *num_resource_id);
+int ps3_repository_read_spu_resource_id(unsigned int res_index,
+ enum ps3_spu_resource_type* resource_type, unsigned int *resource_id);
+
+/* repository vuart info */
+
+int __init ps3_repository_read_vuart_av_port(unsigned int *port);
+int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port);
+
+#endif
diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c
new file mode 100644
index 000000000..1abe33fbe
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/repository.c
@@ -0,0 +1,1380 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 repository routines.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#include <asm/lv1call.h>
+
+#include "platform.h"
+
+enum ps3_vendor_id {
+ PS3_VENDOR_ID_NONE = 0,
+ PS3_VENDOR_ID_SONY = 0x8000000000000000UL,
+};
+
+enum ps3_lpar_id {
+ PS3_LPAR_ID_CURRENT = 0,
+ PS3_LPAR_ID_PME = 1,
+};
+
+#define dump_field(_a, _b) _dump_field(_a, _b, __func__, __LINE__)
+static void _dump_field(const char *hdr, u64 n, const char *func, int line)
+{
+#if defined(DEBUG)
+ char s[16];
+ const char *const in = (const char *)&n;
+ unsigned int i;
+
+ for (i = 0; i < 8; i++)
+ s[i] = (in[i] <= 126 && in[i] >= 32) ? in[i] : '.';
+ s[i] = 0;
+
+ pr_devel("%s:%d: %s%016llx : %s\n", func, line, hdr, n, s);
+#endif
+}
+
+#define dump_node_name(_a, _b, _c, _d, _e) \
+ _dump_node_name(_a, _b, _c, _d, _e, __func__, __LINE__)
+static void _dump_node_name(unsigned int lpar_id, u64 n1, u64 n2, u64 n3,
+ u64 n4, const char *func, int line)
+{
+ pr_devel("%s:%d: lpar: %u\n", func, line, lpar_id);
+ _dump_field("n1: ", n1, func, line);
+ _dump_field("n2: ", n2, func, line);
+ _dump_field("n3: ", n3, func, line);
+ _dump_field("n4: ", n4, func, line);
+}
+
+#define dump_node(_a, _b, _c, _d, _e, _f, _g) \
+ _dump_node(_a, _b, _c, _d, _e, _f, _g, __func__, __LINE__)
+static void _dump_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4,
+ u64 v1, u64 v2, const char *func, int line)
+{
+ pr_devel("%s:%d: lpar: %u\n", func, line, lpar_id);
+ _dump_field("n1: ", n1, func, line);
+ _dump_field("n2: ", n2, func, line);
+ _dump_field("n3: ", n3, func, line);
+ _dump_field("n4: ", n4, func, line);
+ pr_devel("%s:%d: v1: %016llx\n", func, line, v1);
+ pr_devel("%s:%d: v2: %016llx\n", func, line, v2);
+}
+
+/**
+ * make_first_field - Make the first field of a repository node name.
+ * @text: Text portion of the field.
+ * @index: Numeric index portion of the field. Use zero for 'don't care'.
+ *
+ * This routine sets the vendor id to zero (non-vendor specific).
+ * Returns field value.
+ */
+
+static u64 make_first_field(const char *text, u64 index)
+{
+ u64 n = 0;
+
+ memcpy((char *)&n, text, strnlen(text, sizeof(n)));
+ return PS3_VENDOR_ID_NONE + (n >> 32) + index;
+}
+
+/**
+ * make_field - Make subsequent fields of a repository node name.
+ * @text: Text portion of the field. Use "" for 'don't care'.
+ * @index: Numeric index portion of the field. Use zero for 'don't care'.
+ *
+ * Returns field value.
+ */
+
+static u64 make_field(const char *text, u64 index)
+{
+ u64 n = 0;
+
+ memcpy((char *)&n, text, strnlen(text, sizeof(n)));
+ return n + index;
+}
+
+/**
+ * read_node - Read a repository node from raw fields.
+ * @n1: First field of node name.
+ * @n2: Second field of node name. Use zero for 'don't care'.
+ * @n3: Third field of node name. Use zero for 'don't care'.
+ * @n4: Fourth field of node name. Use zero for 'don't care'.
+ * @v1: First repository value (high word).
+ * @v2: Second repository value (low word). Optional parameter, use zero
+ * for 'don't care'.
+ */
+
+static int read_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4,
+ u64 *_v1, u64 *_v2)
+{
+ int result;
+ u64 v1;
+ u64 v2;
+
+ if (lpar_id == PS3_LPAR_ID_CURRENT) {
+ u64 id;
+ lv1_get_logical_partition_id(&id);
+ lpar_id = id;
+ }
+
+ result = lv1_read_repository_node(lpar_id, n1, n2, n3, n4, &v1,
+ &v2);
+
+ if (result) {
+ pr_warn("%s:%d: lv1_read_repository_node failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ dump_node_name(lpar_id, n1, n2, n3, n4);
+ return -ENOENT;
+ }
+
+ dump_node(lpar_id, n1, n2, n3, n4, v1, v2);
+
+ if (_v1)
+ *_v1 = v1;
+ if (_v2)
+ *_v2 = v2;
+
+ if (v1 && !_v1)
+ pr_devel("%s:%d: warning: discarding non-zero v1: %016llx\n",
+ __func__, __LINE__, v1);
+ if (v2 && !_v2)
+ pr_devel("%s:%d: warning: discarding non-zero v2: %016llx\n",
+ __func__, __LINE__, v2);
+
+ return 0;
+}
+
+int ps3_repository_read_bus_str(unsigned int bus_index, const char *bus_str,
+ u64 *value)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field(bus_str, 0),
+ 0, 0,
+ value, NULL);
+}
+
+int ps3_repository_read_bus_id(unsigned int bus_index, u64 *bus_id)
+{
+ return read_node(PS3_LPAR_ID_PME, make_first_field("bus", bus_index),
+ make_field("id", 0), 0, 0, bus_id, NULL);
+}
+
+int ps3_repository_read_bus_type(unsigned int bus_index,
+ enum ps3_bus_type *bus_type)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("type", 0),
+ 0, 0,
+ &v1, NULL);
+ *bus_type = v1;
+ return result;
+}
+
+int ps3_repository_read_bus_num_dev(unsigned int bus_index,
+ unsigned int *num_dev)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("num_dev", 0),
+ 0, 0,
+ &v1, NULL);
+ *num_dev = v1;
+ return result;
+}
+
+int ps3_repository_read_dev_str(unsigned int bus_index,
+ unsigned int dev_index, const char *dev_str, u64 *value)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field(dev_str, 0),
+ 0,
+ value, NULL);
+}
+
+int ps3_repository_read_dev_id(unsigned int bus_index, unsigned int dev_index,
+ u64 *dev_id)
+{
+ return read_node(PS3_LPAR_ID_PME, make_first_field("bus", bus_index),
+ make_field("dev", dev_index), make_field("id", 0), 0,
+ dev_id, NULL);
+}
+
+int ps3_repository_read_dev_type(unsigned int bus_index,
+ unsigned int dev_index, enum ps3_dev_type *dev_type)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("type", 0),
+ 0,
+ &v1, NULL);
+ *dev_type = v1;
+ return result;
+}
+
+int ps3_repository_read_dev_intr(unsigned int bus_index,
+ unsigned int dev_index, unsigned int intr_index,
+ enum ps3_interrupt_type *intr_type, unsigned int *interrupt_id)
+{
+ int result;
+ u64 v1 = 0;
+ u64 v2 = 0;
+
+ result = read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("intr", intr_index),
+ 0,
+ &v1, &v2);
+ *intr_type = v1;
+ *interrupt_id = v2;
+ return result;
+}
+
+int ps3_repository_read_dev_reg_type(unsigned int bus_index,
+ unsigned int dev_index, unsigned int reg_index,
+ enum ps3_reg_type *reg_type)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("reg", reg_index),
+ make_field("type", 0),
+ &v1, NULL);
+ *reg_type = v1;
+ return result;
+}
+
+int ps3_repository_read_dev_reg_addr(unsigned int bus_index,
+ unsigned int dev_index, unsigned int reg_index, u64 *bus_addr, u64 *len)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("reg", reg_index),
+ make_field("data", 0),
+ bus_addr, len);
+}
+
+int ps3_repository_read_dev_reg(unsigned int bus_index,
+ unsigned int dev_index, unsigned int reg_index,
+ enum ps3_reg_type *reg_type, u64 *bus_addr, u64 *len)
+{
+ int result = ps3_repository_read_dev_reg_type(bus_index, dev_index,
+ reg_index, reg_type);
+ return result ? result
+ : ps3_repository_read_dev_reg_addr(bus_index, dev_index,
+ reg_index, bus_addr, len);
+}
+
+
+
+int ps3_repository_find_device(struct ps3_repository_device *repo)
+{
+ int result;
+ struct ps3_repository_device tmp = *repo;
+ unsigned int num_dev;
+
+ BUG_ON(repo->bus_index > 10);
+ BUG_ON(repo->dev_index > 10);
+
+ result = ps3_repository_read_bus_num_dev(tmp.bus_index, &num_dev);
+
+ if (result) {
+ pr_devel("%s:%d read_bus_num_dev failed\n", __func__, __LINE__);
+ return result;
+ }
+
+ pr_devel("%s:%d: bus_type %u, bus_index %u, bus_id %llu, num_dev %u\n",
+ __func__, __LINE__, tmp.bus_type, tmp.bus_index, tmp.bus_id,
+ num_dev);
+
+ if (tmp.dev_index >= num_dev) {
+ pr_devel("%s:%d: no device found\n", __func__, __LINE__);
+ return -ENODEV;
+ }
+
+ result = ps3_repository_read_dev_type(tmp.bus_index, tmp.dev_index,
+ &tmp.dev_type);
+
+ if (result) {
+ pr_devel("%s:%d read_dev_type failed\n", __func__, __LINE__);
+ return result;
+ }
+
+ result = ps3_repository_read_dev_id(tmp.bus_index, tmp.dev_index,
+ &tmp.dev_id);
+
+ if (result) {
+ pr_devel("%s:%d ps3_repository_read_dev_id failed\n", __func__,
+ __LINE__);
+ return result;
+ }
+
+ pr_devel("%s:%d: found: dev_type %u, dev_index %u, dev_id %llu\n",
+ __func__, __LINE__, tmp.dev_type, tmp.dev_index, tmp.dev_id);
+
+ *repo = tmp;
+ return 0;
+}
+
+int ps3_repository_find_device_by_id(struct ps3_repository_device *repo,
+ u64 bus_id, u64 dev_id)
+{
+ int result = -ENODEV;
+ struct ps3_repository_device tmp;
+ unsigned int num_dev;
+
+ pr_devel(" -> %s:%u: find device by id %llu:%llu\n", __func__, __LINE__,
+ bus_id, dev_id);
+
+ for (tmp.bus_index = 0; tmp.bus_index < 10; tmp.bus_index++) {
+ result = ps3_repository_read_bus_id(tmp.bus_index,
+ &tmp.bus_id);
+ if (result) {
+ pr_devel("%s:%u read_bus_id(%u) failed\n", __func__,
+ __LINE__, tmp.bus_index);
+ return result;
+ }
+
+ if (tmp.bus_id == bus_id)
+ goto found_bus;
+
+ pr_devel("%s:%u: skip, bus_id %llu\n", __func__, __LINE__,
+ tmp.bus_id);
+ }
+ pr_devel(" <- %s:%u: bus not found\n", __func__, __LINE__);
+ return result;
+
+found_bus:
+ result = ps3_repository_read_bus_type(tmp.bus_index, &tmp.bus_type);
+ if (result) {
+ pr_devel("%s:%u read_bus_type(%u) failed\n", __func__,
+ __LINE__, tmp.bus_index);
+ return result;
+ }
+
+ result = ps3_repository_read_bus_num_dev(tmp.bus_index, &num_dev);
+ if (result) {
+ pr_devel("%s:%u read_bus_num_dev failed\n", __func__,
+ __LINE__);
+ return result;
+ }
+
+ for (tmp.dev_index = 0; tmp.dev_index < num_dev; tmp.dev_index++) {
+ result = ps3_repository_read_dev_id(tmp.bus_index,
+ tmp.dev_index,
+ &tmp.dev_id);
+ if (result) {
+ pr_devel("%s:%u read_dev_id(%u:%u) failed\n", __func__,
+ __LINE__, tmp.bus_index, tmp.dev_index);
+ return result;
+ }
+
+ if (tmp.dev_id == dev_id)
+ goto found_dev;
+
+ pr_devel("%s:%u: skip, dev_id %llu\n", __func__, __LINE__,
+ tmp.dev_id);
+ }
+ pr_devel(" <- %s:%u: dev not found\n", __func__, __LINE__);
+ return result;
+
+found_dev:
+ result = ps3_repository_read_dev_type(tmp.bus_index, tmp.dev_index,
+ &tmp.dev_type);
+ if (result) {
+ pr_devel("%s:%u read_dev_type failed\n", __func__, __LINE__);
+ return result;
+ }
+
+ pr_devel(" <- %s:%u: found: type (%u:%u) index (%u:%u) id (%llu:%llu)\n",
+ __func__, __LINE__, tmp.bus_type, tmp.dev_type, tmp.bus_index,
+ tmp.dev_index, tmp.bus_id, tmp.dev_id);
+ *repo = tmp;
+ return 0;
+}
+
+int __init ps3_repository_find_devices(enum ps3_bus_type bus_type,
+ int (*callback)(const struct ps3_repository_device *repo))
+{
+ int result = 0;
+ struct ps3_repository_device repo;
+
+ pr_devel(" -> %s:%d: find bus_type %u\n", __func__, __LINE__, bus_type);
+
+ repo.bus_type = bus_type;
+ result = ps3_repository_find_bus(repo.bus_type, 0, &repo.bus_index);
+ if (result) {
+ pr_devel(" <- %s:%u: bus not found\n", __func__, __LINE__);
+ return result;
+ }
+
+ result = ps3_repository_read_bus_id(repo.bus_index, &repo.bus_id);
+ if (result) {
+ pr_devel("%s:%d read_bus_id(%u) failed\n", __func__, __LINE__,
+ repo.bus_index);
+ return result;
+ }
+
+ for (repo.dev_index = 0; ; repo.dev_index++) {
+ result = ps3_repository_find_device(&repo);
+ if (result == -ENODEV) {
+ result = 0;
+ break;
+ } else if (result)
+ break;
+
+ result = callback(&repo);
+ if (result) {
+ pr_devel("%s:%d: abort at callback\n", __func__,
+ __LINE__);
+ break;
+ }
+ }
+
+ pr_devel(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+}
+
+int __init ps3_repository_find_bus(enum ps3_bus_type bus_type, unsigned int from,
+ unsigned int *bus_index)
+{
+ unsigned int i;
+ enum ps3_bus_type type;
+ int error;
+
+ for (i = from; i < 10; i++) {
+ error = ps3_repository_read_bus_type(i, &type);
+ if (error) {
+ pr_devel("%s:%d read_bus_type failed\n",
+ __func__, __LINE__);
+ *bus_index = UINT_MAX;
+ return error;
+ }
+ if (type == bus_type) {
+ *bus_index = i;
+ return 0;
+ }
+ }
+ *bus_index = UINT_MAX;
+ return -ENODEV;
+}
+
+int ps3_repository_find_interrupt(const struct ps3_repository_device *repo,
+ enum ps3_interrupt_type intr_type, unsigned int *interrupt_id)
+{
+ int result = 0;
+ unsigned int res_index;
+
+ pr_devel("%s:%d: find intr_type %u\n", __func__, __LINE__, intr_type);
+
+ *interrupt_id = UINT_MAX;
+
+ for (res_index = 0; res_index < 10; res_index++) {
+ enum ps3_interrupt_type t;
+ unsigned int id;
+
+ result = ps3_repository_read_dev_intr(repo->bus_index,
+ repo->dev_index, res_index, &t, &id);
+
+ if (result) {
+ pr_devel("%s:%d read_dev_intr failed\n",
+ __func__, __LINE__);
+ return result;
+ }
+
+ if (t == intr_type) {
+ *interrupt_id = id;
+ break;
+ }
+ }
+
+ if (res_index == 10)
+ return -ENODEV;
+
+ pr_devel("%s:%d: found intr_type %u at res_index %u\n",
+ __func__, __LINE__, intr_type, res_index);
+
+ return result;
+}
+
+int ps3_repository_find_reg(const struct ps3_repository_device *repo,
+ enum ps3_reg_type reg_type, u64 *bus_addr, u64 *len)
+{
+ int result = 0;
+ unsigned int res_index;
+
+ pr_devel("%s:%d: find reg_type %u\n", __func__, __LINE__, reg_type);
+
+ *bus_addr = *len = 0;
+
+ for (res_index = 0; res_index < 10; res_index++) {
+ enum ps3_reg_type t;
+ u64 a;
+ u64 l;
+
+ result = ps3_repository_read_dev_reg(repo->bus_index,
+ repo->dev_index, res_index, &t, &a, &l);
+
+ if (result) {
+ pr_devel("%s:%d read_dev_reg failed\n",
+ __func__, __LINE__);
+ return result;
+ }
+
+ if (t == reg_type) {
+ *bus_addr = a;
+ *len = l;
+ break;
+ }
+ }
+
+ if (res_index == 10)
+ return -ENODEV;
+
+ pr_devel("%s:%d: found reg_type %u at res_index %u\n",
+ __func__, __LINE__, reg_type, res_index);
+
+ return result;
+}
+
+int ps3_repository_read_stor_dev_port(unsigned int bus_index,
+ unsigned int dev_index, u64 *port)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("port", 0),
+ 0, port, NULL);
+}
+
+int ps3_repository_read_stor_dev_blk_size(unsigned int bus_index,
+ unsigned int dev_index, u64 *blk_size)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("blk_size", 0),
+ 0, blk_size, NULL);
+}
+
+int ps3_repository_read_stor_dev_num_blocks(unsigned int bus_index,
+ unsigned int dev_index, u64 *num_blocks)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("n_blocks", 0),
+ 0, num_blocks, NULL);
+}
+
+int ps3_repository_read_stor_dev_num_regions(unsigned int bus_index,
+ unsigned int dev_index, unsigned int *num_regions)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("n_regs", 0),
+ 0, &v1, NULL);
+ *num_regions = v1;
+ return result;
+}
+
+int ps3_repository_read_stor_dev_region_id(unsigned int bus_index,
+ unsigned int dev_index, unsigned int region_index,
+ unsigned int *region_id)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("region", region_index),
+ make_field("id", 0),
+ &v1, NULL);
+ *region_id = v1;
+ return result;
+}
+
+int ps3_repository_read_stor_dev_region_size(unsigned int bus_index,
+ unsigned int dev_index, unsigned int region_index, u64 *region_size)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("region", region_index),
+ make_field("size", 0),
+ region_size, NULL);
+}
+
+int ps3_repository_read_stor_dev_region_start(unsigned int bus_index,
+ unsigned int dev_index, unsigned int region_index, u64 *region_start)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("bus", bus_index),
+ make_field("dev", dev_index),
+ make_field("region", region_index),
+ make_field("start", 0),
+ region_start, NULL);
+}
+
+int ps3_repository_read_stor_dev_info(unsigned int bus_index,
+ unsigned int dev_index, u64 *port, u64 *blk_size,
+ u64 *num_blocks, unsigned int *num_regions)
+{
+ int result;
+
+ result = ps3_repository_read_stor_dev_port(bus_index, dev_index, port);
+ if (result)
+ return result;
+
+ result = ps3_repository_read_stor_dev_blk_size(bus_index, dev_index,
+ blk_size);
+ if (result)
+ return result;
+
+ result = ps3_repository_read_stor_dev_num_blocks(bus_index, dev_index,
+ num_blocks);
+ if (result)
+ return result;
+
+ result = ps3_repository_read_stor_dev_num_regions(bus_index, dev_index,
+ num_regions);
+ return result;
+}
+
+int ps3_repository_read_stor_dev_region(unsigned int bus_index,
+ unsigned int dev_index, unsigned int region_index,
+ unsigned int *region_id, u64 *region_start, u64 *region_size)
+{
+ int result;
+
+ result = ps3_repository_read_stor_dev_region_id(bus_index, dev_index,
+ region_index, region_id);
+ if (result)
+ return result;
+
+ result = ps3_repository_read_stor_dev_region_start(bus_index, dev_index,
+ region_index, region_start);
+ if (result)
+ return result;
+
+ result = ps3_repository_read_stor_dev_region_size(bus_index, dev_index,
+ region_index, region_size);
+ return result;
+}
+
+/**
+ * ps3_repository_read_num_pu - Number of logical PU processors for this lpar.
+ */
+
+int ps3_repository_read_num_pu(u64 *num_pu)
+{
+ *num_pu = 0;
+ return read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("pun", 0),
+ 0, 0,
+ num_pu, NULL);
+}
+
+/**
+ * ps3_repository_read_pu_id - Read the logical PU id.
+ * @pu_index: Zero based index.
+ * @pu_id: The logical PU id.
+ */
+
+int ps3_repository_read_pu_id(unsigned int pu_index, u64 *pu_id)
+{
+ return read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("pu", pu_index),
+ 0, 0,
+ pu_id, NULL);
+}
+
+int ps3_repository_read_rm_size(unsigned int ppe_id, u64 *rm_size)
+{
+ return read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("pu", 0),
+ ppe_id,
+ make_field("rm_size", 0),
+ rm_size, NULL);
+}
+
+int ps3_repository_read_region_total(u64 *region_total)
+{
+ return read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("rgntotal", 0),
+ 0, 0,
+ region_total, NULL);
+}
+
+/**
+ * ps3_repository_read_mm_info - Read mm info for single pu system.
+ * @rm_base: Real mode memory base address.
+ * @rm_size: Real mode memory size.
+ * @region_total: Maximum memory region size.
+ */
+
+int ps3_repository_read_mm_info(u64 *rm_base, u64 *rm_size, u64 *region_total)
+{
+ int result;
+ u64 ppe_id;
+
+ lv1_get_logical_ppe_id(&ppe_id);
+ *rm_base = 0;
+ result = ps3_repository_read_rm_size(ppe_id, rm_size);
+ return result ? result
+ : ps3_repository_read_region_total(region_total);
+}
+
+/**
+ * ps3_repository_read_highmem_region_count - Read the number of highmem regions
+ *
+ * Bootloaders must arrange the repository nodes such that regions are indexed
+ * with a region_index from 0 to region_count-1.
+ */
+
+int ps3_repository_read_highmem_region_count(unsigned int *region_count)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("highmem", 0),
+ make_field("region", 0),
+ make_field("count", 0),
+ 0,
+ &v1, NULL);
+ *region_count = v1;
+ return result;
+}
+
+
+int ps3_repository_read_highmem_base(unsigned int region_index,
+ u64 *highmem_base)
+{
+ return read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("base", 0),
+ 0,
+ highmem_base, NULL);
+}
+
+int ps3_repository_read_highmem_size(unsigned int region_index,
+ u64 *highmem_size)
+{
+ return read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("size", 0),
+ 0,
+ highmem_size, NULL);
+}
+
+/**
+ * ps3_repository_read_highmem_info - Read high memory region info
+ * @region_index: Region index, {0,..,region_count-1}.
+ * @highmem_base: High memory base address.
+ * @highmem_size: High memory size.
+ *
+ * Bootloaders that preallocate highmem regions must place the
+ * region info into the repository at these well known nodes.
+ */
+
+int ps3_repository_read_highmem_info(unsigned int region_index,
+ u64 *highmem_base, u64 *highmem_size)
+{
+ int result;
+
+ *highmem_base = 0;
+ result = ps3_repository_read_highmem_base(region_index, highmem_base);
+ return result ? result
+ : ps3_repository_read_highmem_size(region_index, highmem_size);
+}
+
+/**
+ * ps3_repository_read_num_spu_reserved - Number of physical spus reserved.
+ * @num_spu: Number of physical spus.
+ */
+
+int ps3_repository_read_num_spu_reserved(unsigned int *num_spu_reserved)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("spun", 0),
+ 0, 0,
+ &v1, NULL);
+ *num_spu_reserved = v1;
+ return result;
+}
+
+/**
+ * ps3_repository_read_num_spu_resource_id - Number of spu resource reservations.
+ * @num_resource_id: Number of spu resource ids.
+ */
+
+int ps3_repository_read_num_spu_resource_id(unsigned int *num_resource_id)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("spursvn", 0),
+ 0, 0,
+ &v1, NULL);
+ *num_resource_id = v1;
+ return result;
+}
+
+/**
+ * ps3_repository_read_spu_resource_id - spu resource reservation id value.
+ * @res_index: Resource reservation index.
+ * @resource_type: Resource reservation type.
+ * @resource_id: Resource reservation id.
+ */
+
+int ps3_repository_read_spu_resource_id(unsigned int res_index,
+ enum ps3_spu_resource_type *resource_type, unsigned int *resource_id)
+{
+ int result;
+ u64 v1 = 0;
+ u64 v2 = 0;
+
+ result = read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("spursv", 0),
+ res_index,
+ 0,
+ &v1, &v2);
+ *resource_type = v1;
+ *resource_id = v2;
+ return result;
+}
+
+static int ps3_repository_read_boot_dat_address(u64 *address)
+{
+ return read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("boot_dat", 0),
+ make_field("address", 0),
+ 0,
+ address, NULL);
+}
+
+int ps3_repository_read_boot_dat_size(unsigned int *size)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("boot_dat", 0),
+ make_field("size", 0),
+ 0,
+ &v1, NULL);
+ *size = v1;
+ return result;
+}
+
+int __init ps3_repository_read_vuart_av_port(unsigned int *port)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("vir_uart", 0),
+ make_field("port", 0),
+ make_field("avset", 0),
+ &v1, NULL);
+ *port = v1;
+ return result;
+}
+
+int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_CURRENT,
+ make_first_field("bi", 0),
+ make_field("vir_uart", 0),
+ make_field("port", 0),
+ make_field("sysmgr", 0),
+ &v1, NULL);
+ *port = v1;
+ return result;
+}
+
+/**
+ * ps3_repository_read_boot_dat_info - Get address and size of cell_ext_os_area.
+ * address: lpar address of cell_ext_os_area
+ * @size: size of cell_ext_os_area
+ */
+
+int ps3_repository_read_boot_dat_info(u64 *lpar_addr, unsigned int *size)
+{
+ int result;
+
+ *size = 0;
+ result = ps3_repository_read_boot_dat_address(lpar_addr);
+ return result ? result
+ : ps3_repository_read_boot_dat_size(size);
+}
+
+/**
+ * ps3_repository_read_num_be - Number of physical BE processors in the system.
+ */
+
+int ps3_repository_read_num_be(unsigned int *num_be)
+{
+ int result;
+ u64 v1 = 0;
+
+ result = read_node(PS3_LPAR_ID_PME,
+ make_first_field("ben", 0),
+ 0,
+ 0,
+ 0,
+ &v1, NULL);
+ *num_be = v1;
+ return result;
+}
+
+/**
+ * ps3_repository_read_be_node_id - Read the physical BE processor node id.
+ * @be_index: Zero based index.
+ * @node_id: The BE processor node id.
+ */
+
+int ps3_repository_read_be_node_id(unsigned int be_index, u64 *node_id)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("be", be_index),
+ 0,
+ 0,
+ 0,
+ node_id, NULL);
+}
+
+/**
+ * ps3_repository_read_be_id - Read the physical BE processor id.
+ * @node_id: The BE processor node id.
+ * @be_id: The BE processor id.
+ */
+
+int ps3_repository_read_be_id(u64 node_id, u64 *be_id)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("be", 0),
+ node_id,
+ 0,
+ 0,
+ be_id, NULL);
+}
+
+int __init ps3_repository_read_tb_freq(u64 node_id, u64 *tb_freq)
+{
+ return read_node(PS3_LPAR_ID_PME,
+ make_first_field("be", 0),
+ node_id,
+ make_field("clock", 0),
+ 0,
+ tb_freq, NULL);
+}
+
+int __init ps3_repository_read_be_tb_freq(unsigned int be_index, u64 *tb_freq)
+{
+ int result;
+ u64 node_id;
+
+ *tb_freq = 0;
+ result = ps3_repository_read_be_node_id(be_index, &node_id);
+ return result ? result
+ : ps3_repository_read_tb_freq(node_id, tb_freq);
+}
+
+int ps3_repository_read_lpm_privileges(unsigned int be_index, u64 *lpar,
+ u64 *rights)
+{
+ int result;
+ u64 node_id;
+
+ *lpar = 0;
+ *rights = 0;
+ result = ps3_repository_read_be_node_id(be_index, &node_id);
+ return result ? result
+ : read_node(PS3_LPAR_ID_PME,
+ make_first_field("be", 0),
+ node_id,
+ make_field("lpm", 0),
+ make_field("priv", 0),
+ lpar, rights);
+}
+
+#if defined(CONFIG_PS3_REPOSITORY_WRITE)
+
+static int create_node(u64 n1, u64 n2, u64 n3, u64 n4, u64 v1, u64 v2)
+{
+ int result;
+
+ dump_node(0, n1, n2, n3, n4, v1, v2);
+
+ result = lv1_create_repository_node(n1, n2, n3, n4, v1, v2);
+
+ if (result) {
+ pr_devel("%s:%d: lv1_create_repository_node failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int delete_node(u64 n1, u64 n2, u64 n3, u64 n4)
+{
+ int result;
+
+ dump_node(0, n1, n2, n3, n4, 0, 0);
+
+ result = lv1_delete_repository_node(n1, n2, n3, n4);
+
+ if (result) {
+ pr_devel("%s:%d: lv1_delete_repository_node failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int write_node(u64 n1, u64 n2, u64 n3, u64 n4, u64 v1, u64 v2)
+{
+ int result;
+
+ result = create_node(n1, n2, n3, n4, v1, v2);
+
+ if (!result)
+ return 0;
+
+ result = lv1_write_repository_node(n1, n2, n3, n4, v1, v2);
+
+ if (result) {
+ pr_devel("%s:%d: lv1_write_repository_node failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+int ps3_repository_write_highmem_region_count(unsigned int region_count)
+{
+ int result;
+ u64 v1 = (u64)region_count;
+
+ result = write_node(
+ make_first_field("highmem", 0),
+ make_field("region", 0),
+ make_field("count", 0),
+ 0,
+ v1, 0);
+ return result;
+}
+
+int ps3_repository_write_highmem_base(unsigned int region_index,
+ u64 highmem_base)
+{
+ return write_node(
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("base", 0),
+ 0,
+ highmem_base, 0);
+}
+
+int ps3_repository_write_highmem_size(unsigned int region_index,
+ u64 highmem_size)
+{
+ return write_node(
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("size", 0),
+ 0,
+ highmem_size, 0);
+}
+
+int ps3_repository_write_highmem_info(unsigned int region_index,
+ u64 highmem_base, u64 highmem_size)
+{
+ int result;
+
+ result = ps3_repository_write_highmem_base(region_index, highmem_base);
+ return result ? result
+ : ps3_repository_write_highmem_size(region_index, highmem_size);
+}
+
+static int ps3_repository_delete_highmem_base(unsigned int region_index)
+{
+ return delete_node(
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("base", 0),
+ 0);
+}
+
+static int ps3_repository_delete_highmem_size(unsigned int region_index)
+{
+ return delete_node(
+ make_first_field("highmem", 0),
+ make_field("region", region_index),
+ make_field("size", 0),
+ 0);
+}
+
+int ps3_repository_delete_highmem_info(unsigned int region_index)
+{
+ int result;
+
+ result = ps3_repository_delete_highmem_base(region_index);
+ result += ps3_repository_delete_highmem_size(region_index);
+
+ return result ? -1 : 0;
+}
+
+#endif /* defined(CONFIG_PS3_REPOSITORY_WRITE) */
+
+#if defined(DEBUG)
+
+int __init ps3_repository_dump_resource_info(const struct ps3_repository_device *repo)
+{
+ int result = 0;
+ unsigned int res_index;
+
+ pr_devel(" -> %s:%d: (%u:%u)\n", __func__, __LINE__,
+ repo->bus_index, repo->dev_index);
+
+ for (res_index = 0; res_index < 10; res_index++) {
+ enum ps3_interrupt_type intr_type;
+ unsigned int interrupt_id;
+
+ result = ps3_repository_read_dev_intr(repo->bus_index,
+ repo->dev_index, res_index, &intr_type, &interrupt_id);
+
+ if (result) {
+ if (result != LV1_NO_ENTRY)
+ pr_devel("%s:%d ps3_repository_read_dev_intr"
+ " (%u:%u) failed\n", __func__, __LINE__,
+ repo->bus_index, repo->dev_index);
+ break;
+ }
+
+ pr_devel("%s:%d (%u:%u) intr_type %u, interrupt_id %u\n",
+ __func__, __LINE__, repo->bus_index, repo->dev_index,
+ intr_type, interrupt_id);
+ }
+
+ for (res_index = 0; res_index < 10; res_index++) {
+ enum ps3_reg_type reg_type;
+ u64 bus_addr;
+ u64 len;
+
+ result = ps3_repository_read_dev_reg(repo->bus_index,
+ repo->dev_index, res_index, &reg_type, &bus_addr, &len);
+
+ if (result) {
+ if (result != LV1_NO_ENTRY)
+ pr_devel("%s:%d ps3_repository_read_dev_reg"
+ " (%u:%u) failed\n", __func__, __LINE__,
+ repo->bus_index, repo->dev_index);
+ break;
+ }
+
+ pr_devel("%s:%d (%u:%u) reg_type %u, bus_addr %llxh, len %llxh\n",
+ __func__, __LINE__, repo->bus_index, repo->dev_index,
+ reg_type, bus_addr, len);
+ }
+
+ pr_devel(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+}
+
+static int __init dump_stor_dev_info(struct ps3_repository_device *repo)
+{
+ int result = 0;
+ unsigned int num_regions, region_index;
+ u64 port, blk_size, num_blocks;
+
+ pr_devel(" -> %s:%d: (%u:%u)\n", __func__, __LINE__,
+ repo->bus_index, repo->dev_index);
+
+ result = ps3_repository_read_stor_dev_info(repo->bus_index,
+ repo->dev_index, &port, &blk_size, &num_blocks, &num_regions);
+ if (result) {
+ pr_devel("%s:%d ps3_repository_read_stor_dev_info"
+ " (%u:%u) failed\n", __func__, __LINE__,
+ repo->bus_index, repo->dev_index);
+ goto out;
+ }
+
+ pr_devel("%s:%d (%u:%u): port %llu, blk_size %llu, num_blocks "
+ "%llu, num_regions %u\n",
+ __func__, __LINE__, repo->bus_index, repo->dev_index,
+ port, blk_size, num_blocks, num_regions);
+
+ for (region_index = 0; region_index < num_regions; region_index++) {
+ unsigned int region_id;
+ u64 region_start, region_size;
+
+ result = ps3_repository_read_stor_dev_region(repo->bus_index,
+ repo->dev_index, region_index, &region_id,
+ &region_start, &region_size);
+ if (result) {
+ pr_devel("%s:%d ps3_repository_read_stor_dev_region"
+ " (%u:%u) failed\n", __func__, __LINE__,
+ repo->bus_index, repo->dev_index);
+ break;
+ }
+
+ pr_devel("%s:%d (%u:%u) region_id %u, start %lxh, size %lxh\n",
+ __func__, __LINE__, repo->bus_index, repo->dev_index,
+ region_id, (unsigned long)region_start,
+ (unsigned long)region_size);
+ }
+
+out:
+ pr_devel(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+}
+
+static int __init dump_device_info(struct ps3_repository_device *repo,
+ unsigned int num_dev)
+{
+ int result = 0;
+
+ pr_devel(" -> %s:%d: bus_%u\n", __func__, __LINE__, repo->bus_index);
+
+ for (repo->dev_index = 0; repo->dev_index < num_dev;
+ repo->dev_index++) {
+
+ result = ps3_repository_read_dev_type(repo->bus_index,
+ repo->dev_index, &repo->dev_type);
+
+ if (result) {
+ pr_devel("%s:%d ps3_repository_read_dev_type"
+ " (%u:%u) failed\n", __func__, __LINE__,
+ repo->bus_index, repo->dev_index);
+ break;
+ }
+
+ result = ps3_repository_read_dev_id(repo->bus_index,
+ repo->dev_index, &repo->dev_id);
+
+ if (result) {
+ pr_devel("%s:%d ps3_repository_read_dev_id"
+ " (%u:%u) failed\n", __func__, __LINE__,
+ repo->bus_index, repo->dev_index);
+ continue;
+ }
+
+ pr_devel("%s:%d (%u:%u): dev_type %u, dev_id %lu\n", __func__,
+ __LINE__, repo->bus_index, repo->dev_index,
+ repo->dev_type, (unsigned long)repo->dev_id);
+
+ ps3_repository_dump_resource_info(repo);
+
+ if (repo->bus_type == PS3_BUS_TYPE_STORAGE)
+ dump_stor_dev_info(repo);
+ }
+
+ pr_devel(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+}
+
+int __init ps3_repository_dump_bus_info(void)
+{
+ int result = 0;
+ struct ps3_repository_device repo;
+
+ pr_devel(" -> %s:%d\n", __func__, __LINE__);
+
+ memset(&repo, 0, sizeof(repo));
+
+ for (repo.bus_index = 0; repo.bus_index < 10; repo.bus_index++) {
+ unsigned int num_dev;
+
+ result = ps3_repository_read_bus_type(repo.bus_index,
+ &repo.bus_type);
+
+ if (result) {
+ pr_devel("%s:%d read_bus_type(%u) failed\n",
+ __func__, __LINE__, repo.bus_index);
+ break;
+ }
+
+ result = ps3_repository_read_bus_id(repo.bus_index,
+ &repo.bus_id);
+
+ if (result) {
+ pr_devel("%s:%d read_bus_id(%u) failed\n",
+ __func__, __LINE__, repo.bus_index);
+ continue;
+ }
+
+ if (repo.bus_index != repo.bus_id)
+ pr_devel("%s:%d bus_index != bus_id\n",
+ __func__, __LINE__);
+
+ result = ps3_repository_read_bus_num_dev(repo.bus_index,
+ &num_dev);
+
+ if (result) {
+ pr_devel("%s:%d read_bus_num_dev(%u) failed\n",
+ __func__, __LINE__, repo.bus_index);
+ continue;
+ }
+
+ pr_devel("%s:%d bus_%u: bus_type %u, bus_id %lu, num_dev %u\n",
+ __func__, __LINE__, repo.bus_index, repo.bus_type,
+ (unsigned long)repo.bus_id, num_dev);
+
+ dump_device_info(&repo, num_dev);
+ }
+
+ pr_devel(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+}
+
+#endif /* defined(DEBUG) */
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
new file mode 100644
index 000000000..5144f1135
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 platform setup routines.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/time.h>
+#include <asm/iommu.h>
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/ps3gpu.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#else
+#define DBG pr_debug
+#endif
+
+/* mutex synchronizing GPU accesses and video mode changes */
+DEFINE_MUTEX(ps3_gpu_mutex);
+EXPORT_SYMBOL_GPL(ps3_gpu_mutex);
+
+static union ps3_firmware_version ps3_firmware_version;
+static char ps3_firmware_version_str[16];
+
+void ps3_get_firmware_version(union ps3_firmware_version *v)
+{
+ *v = ps3_firmware_version;
+}
+EXPORT_SYMBOL_GPL(ps3_get_firmware_version);
+
+int ps3_compare_firmware_version(u16 major, u16 minor, u16 rev)
+{
+ union ps3_firmware_version x;
+
+ x.pad = 0;
+ x.major = major;
+ x.minor = minor;
+ x.rev = rev;
+
+ return (ps3_firmware_version.raw > x.raw) -
+ (ps3_firmware_version.raw < x.raw);
+}
+EXPORT_SYMBOL_GPL(ps3_compare_firmware_version);
+
+static void ps3_power_save(void)
+{
+ /*
+ * lv1_pause() puts the PPE thread into inactive state until an
+ * irq on an unmasked plug exists. MSR[EE] has no effect.
+ * flags: 0 = wake on DEC interrupt, 1 = ignore DEC interrupt.
+ */
+
+ lv1_pause(0);
+}
+
+static void __noreturn ps3_restart(char *cmd)
+{
+ DBG("%s:%d cmd '%s'\n", __func__, __LINE__, cmd);
+
+ smp_send_stop();
+ ps3_sys_manager_restart(); /* never returns */
+}
+
+static void ps3_power_off(void)
+{
+ DBG("%s:%d\n", __func__, __LINE__);
+
+ smp_send_stop();
+ ps3_sys_manager_power_off(); /* never returns */
+}
+
+static void __noreturn ps3_halt(void)
+{
+ DBG("%s:%d\n", __func__, __LINE__);
+
+ smp_send_stop();
+ ps3_sys_manager_halt(); /* never returns */
+}
+
+static void ps3_panic(char *str)
+{
+ DBG("%s:%d %s\n", __func__, __LINE__, str);
+
+ smp_send_stop();
+ printk("\n");
+ printk(" System does not reboot automatically.\n");
+ printk(" Please press POWER button.\n");
+ printk("\n");
+ panic_flush_kmsg_end();
+
+ while(1)
+ lv1_pause(1);
+}
+
+#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \
+ defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
+static void __init prealloc(struct ps3_prealloc *p)
+{
+ if (!p->size)
+ return;
+
+ p->address = memblock_alloc(p->size, p->align);
+ if (!p->address)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, p->size, p->align);
+
+ printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size,
+ p->address);
+}
+#endif
+
+#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE)
+struct ps3_prealloc ps3fb_videomemory = {
+ .name = "ps3fb videomemory",
+ .size = CONFIG_FB_PS3_DEFAULT_SIZE_M*1024*1024,
+ .align = 1024*1024 /* the GPU requires 1 MiB alignment */
+};
+EXPORT_SYMBOL_GPL(ps3fb_videomemory);
+#define prealloc_ps3fb_videomemory() prealloc(&ps3fb_videomemory)
+
+static int __init early_parse_ps3fb(char *p)
+{
+ if (!p)
+ return 1;
+
+ ps3fb_videomemory.size = ALIGN(memparse(p, &p),
+ ps3fb_videomemory.align);
+ return 0;
+}
+early_param("ps3fb", early_parse_ps3fb);
+#else
+#define prealloc_ps3fb_videomemory() do { } while (0)
+#endif
+
+#if defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
+struct ps3_prealloc ps3flash_bounce_buffer = {
+ .name = "ps3flash bounce buffer",
+ .size = 256*1024,
+ .align = 256*1024
+};
+EXPORT_SYMBOL_GPL(ps3flash_bounce_buffer);
+#define prealloc_ps3flash_bounce_buffer() prealloc(&ps3flash_bounce_buffer)
+
+static int __init early_parse_ps3flash(char *p)
+{
+ if (!p)
+ return 1;
+
+ if (!strcmp(p, "off"))
+ ps3flash_bounce_buffer.size = 0;
+
+ return 0;
+}
+early_param("ps3flash", early_parse_ps3flash);
+#else
+#define prealloc_ps3flash_bounce_buffer() do { } while (0)
+#endif
+
+static int ps3_set_dabr(unsigned long dabr, unsigned long dabrx)
+{
+ /* Have to set at least one bit in the DABRX */
+ if (dabrx == 0 && dabr == 0)
+ dabrx = DABRX_USER;
+ /* hypervisor only allows us to set BTI, Kernel and user */
+ dabrx &= DABRX_BTI | DABRX_KERNEL | DABRX_USER;
+
+ return lv1_set_dabr(dabr, dabrx) ? -1 : 0;
+}
+
+static ssize_t ps3_fw_version_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s", ps3_firmware_version_str);
+}
+
+static int __init ps3_setup_sysfs(void)
+{
+ static struct kobj_attribute attr = __ATTR(fw-version, S_IRUGO,
+ ps3_fw_version_show, NULL);
+ static struct kobject *kobj;
+ int result;
+
+ kobj = kobject_create_and_add("ps3", firmware_kobj);
+
+ if (!kobj) {
+ pr_warn("%s:%d: kobject_create_and_add failed.\n", __func__,
+ __LINE__);
+ return -ENOMEM;
+ }
+
+ result = sysfs_create_file(kobj, &attr.attr);
+
+ if (result) {
+ pr_warn("%s:%d: sysfs_create_file failed.\n", __func__,
+ __LINE__);
+ kobject_put(kobj);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+core_initcall(ps3_setup_sysfs);
+
+static void __init ps3_setup_arch(void)
+{
+ u64 tmp;
+
+ DBG(" -> %s:%d\n", __func__, __LINE__);
+
+ lv1_get_version_info(&ps3_firmware_version.raw, &tmp);
+
+ snprintf(ps3_firmware_version_str, sizeof(ps3_firmware_version_str),
+ "%u.%u.%u", ps3_firmware_version.major,
+ ps3_firmware_version.minor, ps3_firmware_version.rev);
+
+ printk(KERN_INFO "PS3 firmware version %s\n", ps3_firmware_version_str);
+
+ ps3_spu_set_platform();
+
+#ifdef CONFIG_SMP
+ smp_init_ps3();
+#endif
+
+ prealloc_ps3fb_videomemory();
+ prealloc_ps3flash_bounce_buffer();
+
+ ppc_md.power_save = ps3_power_save;
+ ps3_os_area_init();
+
+ DBG(" <- %s:%d\n", __func__, __LINE__);
+}
+
+static void __init ps3_progress(char *s, unsigned short hex)
+{
+ printk("*** %04x : %s\n", hex, s ? s : "");
+}
+
+void __init ps3_early_mm_init(void)
+{
+ unsigned long htab_size;
+
+ ps3_mm_init();
+ ps3_mm_vas_create(&htab_size);
+ ps3_hpte_init(htab_size);
+}
+
+static int __init ps3_probe(void)
+{
+ DBG(" -> %s:%d\n", __func__, __LINE__);
+
+ ps3_os_area_save_params();
+
+ pm_power_off = ps3_power_off;
+
+ DBG(" <- %s:%d\n", __func__, __LINE__);
+ return 1;
+}
+
+#if defined(CONFIG_KEXEC_CORE)
+static void ps3_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+ int cpu = smp_processor_id();
+
+ DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu);
+
+ ps3_smp_cleanup_cpu(cpu);
+ ps3_shutdown_IRQ(cpu);
+
+ DBG(" <- %s:%d\n", __func__, __LINE__);
+}
+#endif
+
+define_machine(ps3) {
+ .name = "PS3",
+ .compatible = "sony,ps3",
+ .probe = ps3_probe,
+ .setup_arch = ps3_setup_arch,
+ .init_IRQ = ps3_init_IRQ,
+ .panic = ps3_panic,
+ .get_boot_time = ps3_get_boot_time,
+ .set_dabr = ps3_set_dabr,
+ .calibrate_decr = ps3_calibrate_decr,
+ .progress = ps3_progress,
+ .restart = ps3_restart,
+ .halt = ps3_halt,
+#if defined(CONFIG_KEXEC_CORE)
+ .kexec_cpu_down = ps3_kexec_cpu_down,
+#endif
+};
diff --git a/arch/powerpc/platforms/ps3/smp.c b/arch/powerpc/platforms/ps3/smp.c
new file mode 100644
index 000000000..852957560
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/smp.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 SMP routines.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+
+#include "platform.h"
+
+#if defined(DEBUG)
+#define DBG udbg_printf
+#else
+#define DBG pr_debug
+#endif
+
+/**
+ * ps3_ipi_virqs - a per cpu array of virqs for ipi use
+ */
+
+#define MSG_COUNT 4
+static DEFINE_PER_CPU(unsigned int [MSG_COUNT], ps3_ipi_virqs);
+
+static void ps3_smp_message_pass(int cpu, int msg)
+{
+ int result;
+ unsigned int virq;
+
+ if (msg >= MSG_COUNT) {
+ DBG("%s:%d: bad msg: %d\n", __func__, __LINE__, msg);
+ return;
+ }
+
+ virq = per_cpu(ps3_ipi_virqs, cpu)[msg];
+ result = ps3_send_event_locally(virq);
+
+ if (result)
+ DBG("%s:%d: ps3_send_event_locally(%d, %d) failed"
+ " (%d)\n", __func__, __LINE__, cpu, msg, result);
+}
+
+static void __init ps3_smp_probe(void)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < 2; cpu++) {
+ int result;
+ unsigned int *virqs = per_cpu(ps3_ipi_virqs, cpu);
+ int i;
+
+ DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu);
+
+ /*
+ * Check assumptions on ps3_ipi_virqs[] indexing. If this
+ * check fails, then a different mapping of PPC_MSG_
+ * to index needs to be setup.
+ */
+
+ BUILD_BUG_ON(PPC_MSG_CALL_FUNCTION != 0);
+ BUILD_BUG_ON(PPC_MSG_RESCHEDULE != 1);
+ BUILD_BUG_ON(PPC_MSG_TICK_BROADCAST != 2);
+ BUILD_BUG_ON(PPC_MSG_NMI_IPI != 3);
+
+ for (i = 0; i < MSG_COUNT; i++) {
+ result = ps3_event_receive_port_setup(cpu, &virqs[i]);
+
+ if (result)
+ continue;
+
+ DBG("%s:%d: (%d, %d) => virq %u\n",
+ __func__, __LINE__, cpu, i, virqs[i]);
+
+ result = smp_request_message_ipi(virqs[i], i);
+
+ if (result)
+ virqs[i] = 0;
+ else
+ ps3_register_ipi_irq(cpu, virqs[i]);
+ }
+
+ ps3_register_ipi_debug_brk(cpu, virqs[PPC_MSG_NMI_IPI]);
+
+ DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu);
+ }
+}
+
+void ps3_smp_cleanup_cpu(int cpu)
+{
+ unsigned int *virqs = per_cpu(ps3_ipi_virqs, cpu);
+ int i;
+
+ DBG(" -> %s:%d: (%d)\n", __func__, __LINE__, cpu);
+
+ for (i = 0; i < MSG_COUNT; i++) {
+ /* Can't call free_irq from interrupt context. */
+ ps3_event_receive_port_destroy(virqs[i]);
+ virqs[i] = 0;
+ }
+
+ DBG(" <- %s:%d: (%d)\n", __func__, __LINE__, cpu);
+}
+
+static struct smp_ops_t ps3_smp_ops = {
+ .probe = ps3_smp_probe,
+ .message_pass = ps3_smp_message_pass,
+ .kick_cpu = smp_generic_kick_cpu,
+};
+
+void __init smp_init_ps3(void)
+{
+ DBG(" -> %s\n", __func__);
+ smp_ops = &ps3_smp_ops;
+ DBG(" <- %s\n", __func__);
+}
diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c
new file mode 100644
index 000000000..4a2520ec6
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/spu.c
@@ -0,0 +1,619 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 Platform spu routines.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mmzone.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+#include "../cell/spufs/spufs.h"
+#include "platform.h"
+
+/* spu_management_ops */
+
+/**
+ * enum spe_type - Type of spe to create.
+ * @spe_type_logical: Standard logical spe.
+ *
+ * For use with lv1_construct_logical_spe(). The current HV does not support
+ * any types other than those listed.
+ */
+
+enum spe_type {
+ SPE_TYPE_LOGICAL = 0,
+};
+
+/**
+ * struct spe_shadow - logical spe shadow register area.
+ *
+ * Read-only shadow of spe registers.
+ */
+
+struct spe_shadow {
+ u8 padding_0140[0x0140];
+ u64 int_status_class0_RW; /* 0x0140 */
+ u64 int_status_class1_RW; /* 0x0148 */
+ u64 int_status_class2_RW; /* 0x0150 */
+ u8 padding_0158[0x0610-0x0158];
+ u64 mfc_dsisr_RW; /* 0x0610 */
+ u8 padding_0618[0x0620-0x0618];
+ u64 mfc_dar_RW; /* 0x0620 */
+ u8 padding_0628[0x0800-0x0628];
+ u64 mfc_dsipr_R; /* 0x0800 */
+ u8 padding_0808[0x0810-0x0808];
+ u64 mfc_lscrr_R; /* 0x0810 */
+ u8 padding_0818[0x0c00-0x0818];
+ u64 mfc_cer_R; /* 0x0c00 */
+ u8 padding_0c08[0x0f00-0x0c08];
+ u64 spe_execution_status; /* 0x0f00 */
+ u8 padding_0f08[0x1000-0x0f08];
+};
+
+/**
+ * enum spe_ex_state - Logical spe execution state.
+ * @spe_ex_state_unexecutable: Uninitialized.
+ * @spe_ex_state_executable: Enabled, not ready.
+ * @spe_ex_state_executed: Ready for use.
+ *
+ * The execution state (status) of the logical spe as reported in
+ * struct spe_shadow:spe_execution_status.
+ */
+
+enum spe_ex_state {
+ SPE_EX_STATE_UNEXECUTABLE = 0,
+ SPE_EX_STATE_EXECUTABLE = 2,
+ SPE_EX_STATE_EXECUTED = 3,
+};
+
+/**
+ * struct priv1_cache - Cached values of priv1 registers.
+ * @masks[]: Array of cached spe interrupt masks, indexed by class.
+ * @sr1: Cached mfc_sr1 register.
+ * @tclass_id: Cached mfc_tclass_id register.
+ */
+
+struct priv1_cache {
+ u64 masks[3];
+ u64 sr1;
+ u64 tclass_id;
+};
+
+/**
+ * struct spu_pdata - Platform state variables.
+ * @spe_id: HV spe id returned by lv1_construct_logical_spe().
+ * @resource_id: HV spe resource id returned by
+ * ps3_repository_read_spe_resource_id().
+ * @priv2_addr: lpar address of spe priv2 area returned by
+ * lv1_construct_logical_spe().
+ * @shadow_addr: lpar address of spe register shadow area returned by
+ * lv1_construct_logical_spe().
+ * @shadow: Virtual (ioremap) address of spe register shadow area.
+ * @cache: Cached values of priv1 registers.
+ */
+
+struct spu_pdata {
+ u64 spe_id;
+ u64 resource_id;
+ u64 priv2_addr;
+ u64 shadow_addr;
+ struct spe_shadow __iomem *shadow;
+ struct priv1_cache cache;
+};
+
+static struct spu_pdata *spu_pdata(struct spu *spu)
+{
+ return spu->pdata;
+}
+
+#define dump_areas(_a, _b, _c, _d, _e) \
+ _dump_areas(_a, _b, _c, _d, _e, __func__, __LINE__)
+static void _dump_areas(unsigned int spe_id, unsigned long priv2,
+ unsigned long problem, unsigned long ls, unsigned long shadow,
+ const char* func, int line)
+{
+ pr_debug("%s:%d: spe_id: %xh (%u)\n", func, line, spe_id, spe_id);
+ pr_debug("%s:%d: priv2: %lxh\n", func, line, priv2);
+ pr_debug("%s:%d: problem: %lxh\n", func, line, problem);
+ pr_debug("%s:%d: ls: %lxh\n", func, line, ls);
+ pr_debug("%s:%d: shadow: %lxh\n", func, line, shadow);
+}
+
+u64 ps3_get_spe_id(void *arg)
+{
+ return spu_pdata(arg)->spe_id;
+}
+EXPORT_SYMBOL_GPL(ps3_get_spe_id);
+
+static unsigned long __init get_vas_id(void)
+{
+ u64 id;
+
+ lv1_get_logical_ppe_id(&id);
+ lv1_get_virtual_address_space_id_of_ppe(&id);
+
+ return id;
+}
+
+static int __init construct_spu(struct spu *spu)
+{
+ int result;
+ u64 unused;
+ u64 problem_phys;
+ u64 local_store_phys;
+
+ result = lv1_construct_logical_spe(PAGE_SHIFT, PAGE_SHIFT, PAGE_SHIFT,
+ PAGE_SHIFT, PAGE_SHIFT, get_vas_id(), SPE_TYPE_LOGICAL,
+ &spu_pdata(spu)->priv2_addr, &problem_phys,
+ &local_store_phys, &unused,
+ &spu_pdata(spu)->shadow_addr,
+ &spu_pdata(spu)->spe_id);
+ spu->problem_phys = problem_phys;
+ spu->local_store_phys = local_store_phys;
+
+ if (result) {
+ pr_debug("%s:%d: lv1_construct_logical_spe failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ return result;
+ }
+
+ return result;
+}
+
+static void spu_unmap(struct spu *spu)
+{
+ iounmap(spu->priv2);
+ iounmap(spu->problem);
+ iounmap((__force u8 __iomem *)spu->local_store);
+ iounmap(spu_pdata(spu)->shadow);
+}
+
+/**
+ * setup_areas - Map the spu regions into the address space.
+ *
+ * The current HV requires the spu shadow regs to be mapped with the
+ * PTE page protection bits set as read-only.
+ */
+
+static int __init setup_areas(struct spu *spu)
+{
+ struct table {char* name; unsigned long addr; unsigned long size;};
+ unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO));
+
+ spu_pdata(spu)->shadow = ioremap_prot(spu_pdata(spu)->shadow_addr,
+ sizeof(struct spe_shadow), shadow_flags);
+ if (!spu_pdata(spu)->shadow) {
+ pr_debug("%s:%d: ioremap shadow failed\n", __func__, __LINE__);
+ goto fail_ioremap;
+ }
+
+ spu->local_store = (__force void *)ioremap_wc(spu->local_store_phys, LS_SIZE);
+
+ if (!spu->local_store) {
+ pr_debug("%s:%d: ioremap local_store failed\n",
+ __func__, __LINE__);
+ goto fail_ioremap;
+ }
+
+ spu->problem = ioremap(spu->problem_phys,
+ sizeof(struct spu_problem));
+
+ if (!spu->problem) {
+ pr_debug("%s:%d: ioremap problem failed\n", __func__, __LINE__);
+ goto fail_ioremap;
+ }
+
+ spu->priv2 = ioremap(spu_pdata(spu)->priv2_addr,
+ sizeof(struct spu_priv2));
+
+ if (!spu->priv2) {
+ pr_debug("%s:%d: ioremap priv2 failed\n", __func__, __LINE__);
+ goto fail_ioremap;
+ }
+
+ dump_areas(spu_pdata(spu)->spe_id, spu_pdata(spu)->priv2_addr,
+ spu->problem_phys, spu->local_store_phys,
+ spu_pdata(spu)->shadow_addr);
+ dump_areas(spu_pdata(spu)->spe_id, (unsigned long)spu->priv2,
+ (unsigned long)spu->problem, (unsigned long)spu->local_store,
+ (unsigned long)spu_pdata(spu)->shadow);
+
+ return 0;
+
+fail_ioremap:
+ spu_unmap(spu);
+
+ return -ENOMEM;
+}
+
+static int __init setup_interrupts(struct spu *spu)
+{
+ int result;
+
+ result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id,
+ 0, &spu->irqs[0]);
+
+ if (result)
+ goto fail_alloc_0;
+
+ result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id,
+ 1, &spu->irqs[1]);
+
+ if (result)
+ goto fail_alloc_1;
+
+ result = ps3_spe_irq_setup(PS3_BINDING_CPU_ANY, spu_pdata(spu)->spe_id,
+ 2, &spu->irqs[2]);
+
+ if (result)
+ goto fail_alloc_2;
+
+ return result;
+
+fail_alloc_2:
+ ps3_spe_irq_destroy(spu->irqs[1]);
+fail_alloc_1:
+ ps3_spe_irq_destroy(spu->irqs[0]);
+fail_alloc_0:
+ spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = 0;
+ return result;
+}
+
+static int __init enable_spu(struct spu *spu)
+{
+ int result;
+
+ result = lv1_enable_logical_spe(spu_pdata(spu)->spe_id,
+ spu_pdata(spu)->resource_id);
+
+ if (result) {
+ pr_debug("%s:%d: lv1_enable_logical_spe failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ goto fail_enable;
+ }
+
+ result = setup_areas(spu);
+
+ if (result)
+ goto fail_areas;
+
+ result = setup_interrupts(spu);
+
+ if (result)
+ goto fail_interrupts;
+
+ return 0;
+
+fail_interrupts:
+ spu_unmap(spu);
+fail_areas:
+ lv1_disable_logical_spe(spu_pdata(spu)->spe_id, 0);
+fail_enable:
+ return result;
+}
+
+static int ps3_destroy_spu(struct spu *spu)
+{
+ int result;
+
+ pr_debug("%s:%d spu_%d\n", __func__, __LINE__, spu->number);
+
+ result = lv1_disable_logical_spe(spu_pdata(spu)->spe_id, 0);
+ BUG_ON(result);
+
+ ps3_spe_irq_destroy(spu->irqs[2]);
+ ps3_spe_irq_destroy(spu->irqs[1]);
+ ps3_spe_irq_destroy(spu->irqs[0]);
+
+ spu->irqs[0] = spu->irqs[1] = spu->irqs[2] = 0;
+
+ spu_unmap(spu);
+
+ result = lv1_destruct_logical_spe(spu_pdata(spu)->spe_id);
+ BUG_ON(result);
+
+ kfree(spu->pdata);
+ spu->pdata = NULL;
+
+ return 0;
+}
+
+static int __init ps3_create_spu(struct spu *spu, void *data)
+{
+ int result;
+
+ pr_debug("%s:%d spu_%d\n", __func__, __LINE__, spu->number);
+
+ spu->pdata = kzalloc(sizeof(struct spu_pdata),
+ GFP_KERNEL);
+
+ if (!spu->pdata) {
+ result = -ENOMEM;
+ goto fail_malloc;
+ }
+
+ spu_pdata(spu)->resource_id = (unsigned long)data;
+
+ /* Init cached reg values to HV defaults. */
+
+ spu_pdata(spu)->cache.sr1 = 0x33;
+
+ result = construct_spu(spu);
+
+ if (result)
+ goto fail_construct;
+
+ /* For now, just go ahead and enable it. */
+
+ result = enable_spu(spu);
+
+ if (result)
+ goto fail_enable;
+
+ /* Make sure the spu is in SPE_EX_STATE_EXECUTED. */
+
+ /* need something better here!!! */
+ while (in_be64(&spu_pdata(spu)->shadow->spe_execution_status)
+ != SPE_EX_STATE_EXECUTED)
+ (void)0;
+
+ return result;
+
+fail_enable:
+fail_construct:
+ ps3_destroy_spu(spu);
+fail_malloc:
+ return result;
+}
+
+static int __init ps3_enumerate_spus(int (*fn)(void *data))
+{
+ int result;
+ unsigned int num_resource_id;
+ unsigned int i;
+
+ result = ps3_repository_read_num_spu_resource_id(&num_resource_id);
+
+ pr_debug("%s:%d: num_resource_id %u\n", __func__, __LINE__,
+ num_resource_id);
+
+ /*
+ * For now, just create logical spus equal to the number
+ * of physical spus reserved for the partition.
+ */
+
+ for (i = 0; i < num_resource_id; i++) {
+ enum ps3_spu_resource_type resource_type;
+ unsigned int resource_id;
+
+ result = ps3_repository_read_spu_resource_id(i,
+ &resource_type, &resource_id);
+
+ if (result)
+ break;
+
+ if (resource_type == PS3_SPU_RESOURCE_TYPE_EXCLUSIVE) {
+ result = fn((void*)(unsigned long)resource_id);
+
+ if (result)
+ break;
+ }
+ }
+
+ if (result) {
+ printk(KERN_WARNING "%s:%d: Error initializing spus\n",
+ __func__, __LINE__);
+ return result;
+ }
+
+ return num_resource_id;
+}
+
+static int ps3_init_affinity(void)
+{
+ return 0;
+}
+
+/**
+ * ps3_enable_spu - Enable SPU run control.
+ *
+ * An outstanding enhancement for the PS3 would be to add a guard to check
+ * for incorrect access to the spu problem state when the spu context is
+ * disabled. This check could be implemented with a flag added to the spu
+ * context that would inhibit mapping problem state pages, and a routine
+ * to unmap spu problem state pages. When the spu is enabled with
+ * ps3_enable_spu() the flag would be set allowing pages to be mapped,
+ * and when the spu is disabled with ps3_disable_spu() the flag would be
+ * cleared and the mapped problem state pages would be unmapped.
+ */
+
+static void ps3_enable_spu(struct spu_context *ctx)
+{
+}
+
+static void ps3_disable_spu(struct spu_context *ctx)
+{
+ ctx->ops->runcntl_stop(ctx);
+}
+
+static const struct spu_management_ops spu_management_ps3_ops = {
+ .enumerate_spus = ps3_enumerate_spus,
+ .create_spu = ps3_create_spu,
+ .destroy_spu = ps3_destroy_spu,
+ .enable_spu = ps3_enable_spu,
+ .disable_spu = ps3_disable_spu,
+ .init_affinity = ps3_init_affinity,
+};
+
+/* spu_priv1_ops */
+
+static void int_mask_and(struct spu *spu, int class, u64 mask)
+{
+ u64 old_mask;
+
+ /* are these serialized by caller??? */
+ old_mask = spu_int_mask_get(spu, class);
+ spu_int_mask_set(spu, class, old_mask & mask);
+}
+
+static void int_mask_or(struct spu *spu, int class, u64 mask)
+{
+ u64 old_mask;
+
+ old_mask = spu_int_mask_get(spu, class);
+ spu_int_mask_set(spu, class, old_mask | mask);
+}
+
+static void int_mask_set(struct spu *spu, int class, u64 mask)
+{
+ spu_pdata(spu)->cache.masks[class] = mask;
+ lv1_set_spe_interrupt_mask(spu_pdata(spu)->spe_id, class,
+ spu_pdata(spu)->cache.masks[class]);
+}
+
+static u64 int_mask_get(struct spu *spu, int class)
+{
+ return spu_pdata(spu)->cache.masks[class];
+}
+
+static void int_stat_clear(struct spu *spu, int class, u64 stat)
+{
+ /* Note that MFC_DSISR will be cleared when class1[MF] is set. */
+
+ lv1_clear_spe_interrupt_status(spu_pdata(spu)->spe_id, class,
+ stat, 0);
+}
+
+static u64 int_stat_get(struct spu *spu, int class)
+{
+ u64 stat;
+
+ lv1_get_spe_interrupt_status(spu_pdata(spu)->spe_id, class, &stat);
+ return stat;
+}
+
+static void cpu_affinity_set(struct spu *spu, int cpu)
+{
+ /* No support. */
+}
+
+static u64 mfc_dar_get(struct spu *spu)
+{
+ return in_be64(&spu_pdata(spu)->shadow->mfc_dar_RW);
+}
+
+static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
+{
+ /* Nothing to do, cleared in int_stat_clear(). */
+}
+
+static u64 mfc_dsisr_get(struct spu *spu)
+{
+ return in_be64(&spu_pdata(spu)->shadow->mfc_dsisr_RW);
+}
+
+static void mfc_sdr_setup(struct spu *spu)
+{
+ /* Nothing to do. */
+}
+
+static void mfc_sr1_set(struct spu *spu, u64 sr1)
+{
+ /* Check bits allowed by HV. */
+
+ static const u64 allowed = ~(MFC_STATE1_LOCAL_STORAGE_DECODE_MASK
+ | MFC_STATE1_PROBLEM_STATE_MASK);
+
+ BUG_ON((sr1 & allowed) != (spu_pdata(spu)->cache.sr1 & allowed));
+
+ spu_pdata(spu)->cache.sr1 = sr1;
+ lv1_set_spe_privilege_state_area_1_register(
+ spu_pdata(spu)->spe_id,
+ offsetof(struct spu_priv1, mfc_sr1_RW),
+ spu_pdata(spu)->cache.sr1);
+}
+
+static u64 mfc_sr1_get(struct spu *spu)
+{
+ return spu_pdata(spu)->cache.sr1;
+}
+
+static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
+{
+ spu_pdata(spu)->cache.tclass_id = tclass_id;
+ lv1_set_spe_privilege_state_area_1_register(
+ spu_pdata(spu)->spe_id,
+ offsetof(struct spu_priv1, mfc_tclass_id_RW),
+ spu_pdata(spu)->cache.tclass_id);
+}
+
+static u64 mfc_tclass_id_get(struct spu *spu)
+{
+ return spu_pdata(spu)->cache.tclass_id;
+}
+
+static void tlb_invalidate(struct spu *spu)
+{
+ /* Nothing to do. */
+}
+
+static void resource_allocation_groupID_set(struct spu *spu, u64 id)
+{
+ /* No support. */
+}
+
+static u64 resource_allocation_groupID_get(struct spu *spu)
+{
+ return 0; /* No support. */
+}
+
+static void resource_allocation_enable_set(struct spu *spu, u64 enable)
+{
+ /* No support. */
+}
+
+static u64 resource_allocation_enable_get(struct spu *spu)
+{
+ return 0; /* No support. */
+}
+
+static const struct spu_priv1_ops spu_priv1_ps3_ops = {
+ .int_mask_and = int_mask_and,
+ .int_mask_or = int_mask_or,
+ .int_mask_set = int_mask_set,
+ .int_mask_get = int_mask_get,
+ .int_stat_clear = int_stat_clear,
+ .int_stat_get = int_stat_get,
+ .cpu_affinity_set = cpu_affinity_set,
+ .mfc_dar_get = mfc_dar_get,
+ .mfc_dsisr_set = mfc_dsisr_set,
+ .mfc_dsisr_get = mfc_dsisr_get,
+ .mfc_sdr_setup = mfc_sdr_setup,
+ .mfc_sr1_set = mfc_sr1_set,
+ .mfc_sr1_get = mfc_sr1_get,
+ .mfc_tclass_id_set = mfc_tclass_id_set,
+ .mfc_tclass_id_get = mfc_tclass_id_get,
+ .tlb_invalidate = tlb_invalidate,
+ .resource_allocation_groupID_set = resource_allocation_groupID_set,
+ .resource_allocation_groupID_get = resource_allocation_groupID_get,
+ .resource_allocation_enable_set = resource_allocation_enable_set,
+ .resource_allocation_enable_get = resource_allocation_enable_get,
+};
+
+void ps3_spu_set_platform(void)
+{
+ spu_priv1_ops = &spu_priv1_ps3_ops;
+ spu_management_ops = &spu_management_ps3_ops;
+}
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
new file mode 100644
index 000000000..d6b5f5ecd
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -0,0 +1,803 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 system bus driver.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/dma-map-ops.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <asm/udbg.h>
+#include <asm/lv1call.h>
+#include <asm/firmware.h>
+#include <asm/cell-regs.h>
+
+#include "platform.h"
+
+static struct device ps3_system_bus = {
+ .init_name = "ps3_system",
+};
+
+/* FIXME: need device usage counters! */
+static struct {
+ struct mutex mutex;
+ int sb_11; /* usb 0 */
+ int sb_12; /* usb 0 */
+ int gpu;
+} usage_hack;
+
+static int ps3_is_device(struct ps3_system_bus_device *dev, u64 bus_id,
+ u64 dev_id)
+{
+ return dev->bus_id == bus_id && dev->dev_id == dev_id;
+}
+
+static int ps3_open_hv_device_sb(struct ps3_system_bus_device *dev)
+{
+ int result;
+
+ BUG_ON(!dev->bus_id);
+ mutex_lock(&usage_hack.mutex);
+
+ if (ps3_is_device(dev, 1, 1)) {
+ usage_hack.sb_11++;
+ if (usage_hack.sb_11 > 1) {
+ result = 0;
+ goto done;
+ }
+ }
+
+ if (ps3_is_device(dev, 1, 2)) {
+ usage_hack.sb_12++;
+ if (usage_hack.sb_12 > 1) {
+ result = 0;
+ goto done;
+ }
+ }
+
+ result = lv1_open_device(dev->bus_id, dev->dev_id, 0);
+
+ if (result) {
+ pr_warn("%s:%d: lv1_open_device dev=%u.%u(%s) failed: %s\n",
+ __func__, __LINE__, dev->match_id, dev->match_sub_id,
+ dev_name(&dev->core), ps3_result(result));
+ result = -EPERM;
+ }
+
+done:
+ mutex_unlock(&usage_hack.mutex);
+ return result;
+}
+
+static int ps3_close_hv_device_sb(struct ps3_system_bus_device *dev)
+{
+ int result;
+
+ BUG_ON(!dev->bus_id);
+ mutex_lock(&usage_hack.mutex);
+
+ if (ps3_is_device(dev, 1, 1)) {
+ usage_hack.sb_11--;
+ if (usage_hack.sb_11) {
+ result = 0;
+ goto done;
+ }
+ }
+
+ if (ps3_is_device(dev, 1, 2)) {
+ usage_hack.sb_12--;
+ if (usage_hack.sb_12) {
+ result = 0;
+ goto done;
+ }
+ }
+
+ result = lv1_close_device(dev->bus_id, dev->dev_id);
+ BUG_ON(result);
+
+done:
+ mutex_unlock(&usage_hack.mutex);
+ return result;
+}
+
+static int ps3_open_hv_device_gpu(struct ps3_system_bus_device *dev)
+{
+ int result;
+
+ mutex_lock(&usage_hack.mutex);
+
+ usage_hack.gpu++;
+ if (usage_hack.gpu > 1) {
+ result = 0;
+ goto done;
+ }
+
+ result = lv1_gpu_open(0);
+
+ if (result) {
+ pr_warn("%s:%d: lv1_gpu_open failed: %s\n", __func__,
+ __LINE__, ps3_result(result));
+ result = -EPERM;
+ }
+
+done:
+ mutex_unlock(&usage_hack.mutex);
+ return result;
+}
+
+static int ps3_close_hv_device_gpu(struct ps3_system_bus_device *dev)
+{
+ int result;
+
+ mutex_lock(&usage_hack.mutex);
+
+ usage_hack.gpu--;
+ if (usage_hack.gpu) {
+ result = 0;
+ goto done;
+ }
+
+ result = lv1_gpu_close();
+ BUG_ON(result);
+
+done:
+ mutex_unlock(&usage_hack.mutex);
+ return result;
+}
+
+int ps3_open_hv_device(struct ps3_system_bus_device *dev)
+{
+ BUG_ON(!dev);
+ pr_debug("%s:%d: match_id: %u\n", __func__, __LINE__, dev->match_id);
+
+ switch (dev->match_id) {
+ case PS3_MATCH_ID_EHCI:
+ case PS3_MATCH_ID_OHCI:
+ case PS3_MATCH_ID_GELIC:
+ case PS3_MATCH_ID_STOR_DISK:
+ case PS3_MATCH_ID_STOR_ROM:
+ case PS3_MATCH_ID_STOR_FLASH:
+ return ps3_open_hv_device_sb(dev);
+
+ case PS3_MATCH_ID_SOUND:
+ case PS3_MATCH_ID_GPU:
+ return ps3_open_hv_device_gpu(dev);
+
+ case PS3_MATCH_ID_AV_SETTINGS:
+ case PS3_MATCH_ID_SYSTEM_MANAGER:
+ pr_debug("%s:%d: unsupported match_id: %u\n", __func__,
+ __LINE__, dev->match_id);
+ pr_debug("%s:%d: bus_id: %llu\n", __func__, __LINE__,
+ dev->bus_id);
+ BUG();
+ return -EINVAL;
+
+ default:
+ break;
+ }
+
+ pr_debug("%s:%d: unknown match_id: %u\n", __func__, __LINE__,
+ dev->match_id);
+ BUG();
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(ps3_open_hv_device);
+
+int ps3_close_hv_device(struct ps3_system_bus_device *dev)
+{
+ BUG_ON(!dev);
+ pr_debug("%s:%d: match_id: %u\n", __func__, __LINE__, dev->match_id);
+
+ switch (dev->match_id) {
+ case PS3_MATCH_ID_EHCI:
+ case PS3_MATCH_ID_OHCI:
+ case PS3_MATCH_ID_GELIC:
+ case PS3_MATCH_ID_STOR_DISK:
+ case PS3_MATCH_ID_STOR_ROM:
+ case PS3_MATCH_ID_STOR_FLASH:
+ return ps3_close_hv_device_sb(dev);
+
+ case PS3_MATCH_ID_SOUND:
+ case PS3_MATCH_ID_GPU:
+ return ps3_close_hv_device_gpu(dev);
+
+ case PS3_MATCH_ID_AV_SETTINGS:
+ case PS3_MATCH_ID_SYSTEM_MANAGER:
+ pr_debug("%s:%d: unsupported match_id: %u\n", __func__,
+ __LINE__, dev->match_id);
+ pr_debug("%s:%d: bus_id: %llu\n", __func__, __LINE__,
+ dev->bus_id);
+ BUG();
+ return -EINVAL;
+
+ default:
+ break;
+ }
+
+ pr_debug("%s:%d: unknown match_id: %u\n", __func__, __LINE__,
+ dev->match_id);
+ BUG();
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(ps3_close_hv_device);
+
+#define dump_mmio_region(_a) _dump_mmio_region(_a, __func__, __LINE__)
+static void _dump_mmio_region(const struct ps3_mmio_region* r,
+ const char* func, int line)
+{
+ pr_debug("%s:%d: dev %llu:%llu\n", func, line, r->dev->bus_id,
+ r->dev->dev_id);
+ pr_debug("%s:%d: bus_addr %lxh\n", func, line, r->bus_addr);
+ pr_debug("%s:%d: len %lxh\n", func, line, r->len);
+ pr_debug("%s:%d: lpar_addr %lxh\n", func, line, r->lpar_addr);
+}
+
+static int ps3_sb_mmio_region_create(struct ps3_mmio_region *r)
+{
+ int result;
+ u64 lpar_addr;
+
+ result = lv1_map_device_mmio_region(r->dev->bus_id, r->dev->dev_id,
+ r->bus_addr, r->len, r->page_size, &lpar_addr);
+ r->lpar_addr = lpar_addr;
+
+ if (result) {
+ pr_debug("%s:%d: lv1_map_device_mmio_region failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+ r->lpar_addr = 0;
+ }
+
+ dump_mmio_region(r);
+ return result;
+}
+
+static int ps3_ioc0_mmio_region_create(struct ps3_mmio_region *r)
+{
+ /* device specific; do nothing currently */
+ return 0;
+}
+
+int ps3_mmio_region_create(struct ps3_mmio_region *r)
+{
+ return r->mmio_ops->create(r);
+}
+EXPORT_SYMBOL_GPL(ps3_mmio_region_create);
+
+static int ps3_sb_free_mmio_region(struct ps3_mmio_region *r)
+{
+ int result;
+
+ dump_mmio_region(r);
+ result = lv1_unmap_device_mmio_region(r->dev->bus_id, r->dev->dev_id,
+ r->lpar_addr);
+
+ if (result)
+ pr_debug("%s:%d: lv1_unmap_device_mmio_region failed: %s\n",
+ __func__, __LINE__, ps3_result(result));
+
+ r->lpar_addr = 0;
+ return result;
+}
+
+static int ps3_ioc0_free_mmio_region(struct ps3_mmio_region *r)
+{
+ /* device specific; do nothing currently */
+ return 0;
+}
+
+
+int ps3_free_mmio_region(struct ps3_mmio_region *r)
+{
+ return r->mmio_ops->free(r);
+}
+
+EXPORT_SYMBOL_GPL(ps3_free_mmio_region);
+
+static const struct ps3_mmio_region_ops ps3_mmio_sb_region_ops = {
+ .create = ps3_sb_mmio_region_create,
+ .free = ps3_sb_free_mmio_region
+};
+
+static const struct ps3_mmio_region_ops ps3_mmio_ioc0_region_ops = {
+ .create = ps3_ioc0_mmio_region_create,
+ .free = ps3_ioc0_free_mmio_region
+};
+
+int ps3_mmio_region_init(struct ps3_system_bus_device *dev,
+ struct ps3_mmio_region *r, unsigned long bus_addr, unsigned long len,
+ enum ps3_mmio_page_size page_size)
+{
+ r->dev = dev;
+ r->bus_addr = bus_addr;
+ r->len = len;
+ r->page_size = page_size;
+ switch (dev->dev_type) {
+ case PS3_DEVICE_TYPE_SB:
+ r->mmio_ops = &ps3_mmio_sb_region_ops;
+ break;
+ case PS3_DEVICE_TYPE_IOC0:
+ r->mmio_ops = &ps3_mmio_ioc0_region_ops;
+ break;
+ default:
+ BUG();
+ return -EINVAL;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ps3_mmio_region_init);
+
+static int ps3_system_bus_match(struct device *_dev,
+ struct device_driver *_drv)
+{
+ int result;
+ struct ps3_system_bus_driver *drv = ps3_drv_to_system_bus_drv(_drv);
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+
+ if (!dev->match_sub_id)
+ result = dev->match_id == drv->match_id;
+ else
+ result = dev->match_sub_id == drv->match_sub_id &&
+ dev->match_id == drv->match_id;
+
+ if (result)
+ pr_info("%s:%d: dev=%u.%u(%s), drv=%u.%u(%s): match\n",
+ __func__, __LINE__,
+ dev->match_id, dev->match_sub_id, dev_name(&dev->core),
+ drv->match_id, drv->match_sub_id, drv->core.name);
+ else
+ pr_debug("%s:%d: dev=%u.%u(%s), drv=%u.%u(%s): miss\n",
+ __func__, __LINE__,
+ dev->match_id, dev->match_sub_id, dev_name(&dev->core),
+ drv->match_id, drv->match_sub_id, drv->core.name);
+
+ return result;
+}
+
+static int ps3_system_bus_probe(struct device *_dev)
+{
+ int result = 0;
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+ struct ps3_system_bus_driver *drv;
+
+ BUG_ON(!dev);
+ dev_dbg(_dev, "%s:%d\n", __func__, __LINE__);
+
+ drv = ps3_system_bus_dev_to_system_bus_drv(dev);
+ BUG_ON(!drv);
+
+ if (drv->probe)
+ result = drv->probe(dev);
+ else
+ pr_debug("%s:%d: %s no probe method\n", __func__, __LINE__,
+ dev_name(&dev->core));
+
+ pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core));
+ return result;
+}
+
+static void ps3_system_bus_remove(struct device *_dev)
+{
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+ struct ps3_system_bus_driver *drv;
+
+ BUG_ON(!dev);
+ dev_dbg(_dev, "%s:%d\n", __func__, __LINE__);
+
+ drv = ps3_system_bus_dev_to_system_bus_drv(dev);
+ BUG_ON(!drv);
+
+ if (drv->remove)
+ drv->remove(dev);
+ else
+ dev_dbg(&dev->core, "%s:%d %s: no remove method\n",
+ __func__, __LINE__, drv->core.name);
+
+ pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core));
+}
+
+static void ps3_system_bus_shutdown(struct device *_dev)
+{
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+ struct ps3_system_bus_driver *drv;
+
+ BUG_ON(!dev);
+
+ dev_dbg(&dev->core, " -> %s:%d: match_id %d\n", __func__, __LINE__,
+ dev->match_id);
+
+ if (!dev->core.driver) {
+ dev_dbg(&dev->core, "%s:%d: no driver bound\n", __func__,
+ __LINE__);
+ return;
+ }
+
+ drv = ps3_system_bus_dev_to_system_bus_drv(dev);
+
+ BUG_ON(!drv);
+
+ dev_dbg(&dev->core, "%s:%d: %s -> %s\n", __func__, __LINE__,
+ dev_name(&dev->core), drv->core.name);
+
+ if (drv->shutdown)
+ drv->shutdown(dev);
+ else if (drv->remove) {
+ dev_dbg(&dev->core, "%s:%d %s: no shutdown, calling remove\n",
+ __func__, __LINE__, drv->core.name);
+ drv->remove(dev);
+ } else {
+ dev_dbg(&dev->core, "%s:%d %s: no shutdown method\n",
+ __func__, __LINE__, drv->core.name);
+ BUG();
+ }
+
+ dev_dbg(&dev->core, " <- %s:%d\n", __func__, __LINE__);
+}
+
+static int ps3_system_bus_uevent(const struct device *_dev, struct kobj_uevent_env *env)
+{
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+
+ if (add_uevent_var(env, "MODALIAS=ps3:%d:%d", dev->match_id,
+ dev->match_sub_id))
+ return -ENOMEM;
+ return 0;
+}
+
+static ssize_t modalias_show(struct device *_dev, struct device_attribute *a,
+ char *buf)
+{
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+ int len = snprintf(buf, PAGE_SIZE, "ps3:%d:%d\n", dev->match_id,
+ dev->match_sub_id);
+
+ return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *ps3_system_bus_dev_attrs[] = {
+ &dev_attr_modalias.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(ps3_system_bus_dev);
+
+static struct bus_type ps3_system_bus_type = {
+ .name = "ps3_system_bus",
+ .match = ps3_system_bus_match,
+ .uevent = ps3_system_bus_uevent,
+ .probe = ps3_system_bus_probe,
+ .remove = ps3_system_bus_remove,
+ .shutdown = ps3_system_bus_shutdown,
+ .dev_groups = ps3_system_bus_dev_groups,
+};
+
+static int __init ps3_system_bus_init(void)
+{
+ int result;
+
+ if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+ return -ENODEV;
+
+ pr_debug(" -> %s:%d\n", __func__, __LINE__);
+
+ mutex_init(&usage_hack.mutex);
+
+ result = device_register(&ps3_system_bus);
+ BUG_ON(result);
+
+ result = bus_register(&ps3_system_bus_type);
+ BUG_ON(result);
+
+ pr_debug(" <- %s:%d\n", __func__, __LINE__);
+ return result;
+}
+
+core_initcall(ps3_system_bus_init);
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void * ps3_alloc_coherent(struct device *_dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ unsigned long attrs)
+{
+ int result;
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+ unsigned long virt_addr;
+
+ flag &= ~(__GFP_DMA | __GFP_HIGHMEM);
+ flag |= __GFP_ZERO;
+
+ virt_addr = __get_free_pages(flag, get_order(size));
+
+ if (!virt_addr) {
+ pr_debug("%s:%d: get_free_pages failed\n", __func__, __LINE__);
+ goto clean_none;
+ }
+
+ result = ps3_dma_map(dev->d_region, virt_addr, size, dma_handle,
+ CBE_IOPTE_PP_W | CBE_IOPTE_PP_R |
+ CBE_IOPTE_SO_RW | CBE_IOPTE_M);
+
+ if (result) {
+ pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+ __func__, __LINE__, result);
+ BUG_ON("check region type");
+ goto clean_alloc;
+ }
+
+ return (void*)virt_addr;
+
+clean_alloc:
+ free_pages(virt_addr, get_order(size));
+clean_none:
+ dma_handle = NULL;
+ return NULL;
+}
+
+static void ps3_free_coherent(struct device *_dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, unsigned long attrs)
+{
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+
+ ps3_dma_unmap(dev->d_region, dma_handle, size);
+ free_pages((unsigned long)vaddr, get_order(size));
+}
+
+/* Creates TCEs for a user provided buffer. The user buffer must be
+ * contiguous real kernel storage (not vmalloc). The address passed here
+ * comprises a page address and offset into that page. The dma_addr_t
+ * returned will point to the same byte within the page as was passed in.
+ */
+
+static dma_addr_t ps3_sb_map_page(struct device *_dev, struct page *page,
+ unsigned long offset, size_t size, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+ int result;
+ dma_addr_t bus_addr;
+ void *ptr = page_address(page) + offset;
+
+ result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size,
+ &bus_addr,
+ CBE_IOPTE_PP_R | CBE_IOPTE_PP_W |
+ CBE_IOPTE_SO_RW | CBE_IOPTE_M);
+
+ if (result) {
+ pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+ __func__, __LINE__, result);
+ }
+
+ return bus_addr;
+}
+
+static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+ int result;
+ dma_addr_t bus_addr;
+ u64 iopte_flag;
+ void *ptr = page_address(page) + offset;
+
+ iopte_flag = CBE_IOPTE_M;
+ switch (direction) {
+ case DMA_BIDIRECTIONAL:
+ iopte_flag |= CBE_IOPTE_PP_R | CBE_IOPTE_PP_W | CBE_IOPTE_SO_RW;
+ break;
+ case DMA_TO_DEVICE:
+ iopte_flag |= CBE_IOPTE_PP_R | CBE_IOPTE_SO_R;
+ break;
+ case DMA_FROM_DEVICE:
+ iopte_flag |= CBE_IOPTE_PP_W | CBE_IOPTE_SO_RW;
+ break;
+ default:
+ /* not happened */
+ BUG();
+ }
+ result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size,
+ &bus_addr, iopte_flag);
+
+ if (result) {
+ pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+ __func__, __LINE__, result);
+ }
+ return bus_addr;
+}
+
+static void ps3_unmap_page(struct device *_dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction direction, unsigned long attrs)
+{
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+ int result;
+
+ result = ps3_dma_unmap(dev->d_region, dma_addr, size);
+
+ if (result) {
+ pr_debug("%s:%d: ps3_dma_unmap failed (%d)\n",
+ __func__, __LINE__, result);
+ }
+}
+
+static int ps3_sb_map_sg(struct device *_dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction, unsigned long attrs)
+{
+#if defined(CONFIG_PS3_DYNAMIC_DMA)
+ BUG_ON("do");
+ return -EPERM;
+#else
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i) {
+ int result = ps3_dma_map(dev->d_region, sg_phys(sg),
+ sg->length, &sg->dma_address, 0);
+
+ if (result) {
+ pr_debug("%s:%d: ps3_dma_map failed (%d)\n",
+ __func__, __LINE__, result);
+ return -EINVAL;
+ }
+
+ sg->dma_length = sg->length;
+ }
+
+ return nents;
+#endif
+}
+
+static int ps3_ioc0_map_sg(struct device *_dev, struct scatterlist *sg,
+ int nents,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ BUG();
+ return -EINVAL;
+}
+
+static void ps3_sb_unmap_sg(struct device *_dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction, unsigned long attrs)
+{
+#if defined(CONFIG_PS3_DYNAMIC_DMA)
+ BUG_ON("do");
+#endif
+}
+
+static void ps3_ioc0_unmap_sg(struct device *_dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ BUG();
+}
+
+static int ps3_dma_supported(struct device *_dev, u64 mask)
+{
+ return mask >= DMA_BIT_MASK(32);
+}
+
+static const struct dma_map_ops ps3_sb_dma_ops = {
+ .alloc = ps3_alloc_coherent,
+ .free = ps3_free_coherent,
+ .map_sg = ps3_sb_map_sg,
+ .unmap_sg = ps3_sb_unmap_sg,
+ .dma_supported = ps3_dma_supported,
+ .map_page = ps3_sb_map_page,
+ .unmap_page = ps3_unmap_page,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
+};
+
+static const struct dma_map_ops ps3_ioc0_dma_ops = {
+ .alloc = ps3_alloc_coherent,
+ .free = ps3_free_coherent,
+ .map_sg = ps3_ioc0_map_sg,
+ .unmap_sg = ps3_ioc0_unmap_sg,
+ .dma_supported = ps3_dma_supported,
+ .map_page = ps3_ioc0_map_page,
+ .unmap_page = ps3_unmap_page,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
+};
+
+/**
+ * ps3_system_bus_release_device - remove a device from the system bus
+ */
+
+static void ps3_system_bus_release_device(struct device *_dev)
+{
+ struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
+ kfree(dev);
+}
+
+/**
+ * ps3_system_bus_device_register - add a device to the system bus
+ *
+ * ps3_system_bus_device_register() expects the dev object to be allocated
+ * dynamically by the caller. The system bus takes ownership of the dev
+ * object and frees the object in ps3_system_bus_release_device().
+ */
+
+int ps3_system_bus_device_register(struct ps3_system_bus_device *dev)
+{
+ int result;
+ static unsigned int dev_ioc0_count;
+ static unsigned int dev_sb_count;
+ static unsigned int dev_vuart_count;
+ static unsigned int dev_lpm_count;
+
+ if (!dev->core.parent)
+ dev->core.parent = &ps3_system_bus;
+ dev->core.bus = &ps3_system_bus_type;
+ dev->core.release = ps3_system_bus_release_device;
+
+ switch (dev->dev_type) {
+ case PS3_DEVICE_TYPE_IOC0:
+ dev->core.dma_ops = &ps3_ioc0_dma_ops;
+ dev_set_name(&dev->core, "ioc0_%02x", ++dev_ioc0_count);
+ break;
+ case PS3_DEVICE_TYPE_SB:
+ dev->core.dma_ops = &ps3_sb_dma_ops;
+ dev_set_name(&dev->core, "sb_%02x", ++dev_sb_count);
+
+ break;
+ case PS3_DEVICE_TYPE_VUART:
+ dev_set_name(&dev->core, "vuart_%02x", ++dev_vuart_count);
+ break;
+ case PS3_DEVICE_TYPE_LPM:
+ dev_set_name(&dev->core, "lpm_%02x", ++dev_lpm_count);
+ break;
+ default:
+ BUG();
+ }
+
+ dev->core.of_node = NULL;
+ set_dev_node(&dev->core, 0);
+
+ pr_debug("%s:%d add %s\n", __func__, __LINE__, dev_name(&dev->core));
+
+ result = device_register(&dev->core);
+ return result;
+}
+
+EXPORT_SYMBOL_GPL(ps3_system_bus_device_register);
+
+int ps3_system_bus_driver_register(struct ps3_system_bus_driver *drv)
+{
+ int result;
+
+ pr_debug(" -> %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+
+ if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+ return -ENODEV;
+
+ drv->core.bus = &ps3_system_bus_type;
+
+ result = driver_register(&drv->core);
+ pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+ return result;
+}
+
+EXPORT_SYMBOL_GPL(ps3_system_bus_driver_register);
+
+void ps3_system_bus_driver_unregister(struct ps3_system_bus_driver *drv)
+{
+ pr_debug(" -> %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+ driver_unregister(&drv->core);
+ pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, drv->core.name);
+}
+
+EXPORT_SYMBOL_GPL(ps3_system_bus_driver_unregister);
diff --git a/arch/powerpc/platforms/ps3/time.c b/arch/powerpc/platforms/ps3/time.c
new file mode 100644
index 000000000..c9bfc113a
--- /dev/null
+++ b/arch/powerpc/platforms/ps3/time.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PS3 time and rtc routines.
+ *
+ * Copyright (C) 2006 Sony Computer Entertainment Inc.
+ * Copyright 2006 Sony Corp.
+ */
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/ps3.h>
+
+#include "platform.h"
+
+void __init ps3_calibrate_decr(void)
+{
+ int result;
+ u64 tmp;
+
+ result = ps3_repository_read_be_tb_freq(0, &tmp);
+ BUG_ON(result);
+
+ ppc_tb_freq = tmp;
+ ppc_proc_freq = ppc_tb_freq * 40;
+}
+
+static u64 read_rtc(void)
+{
+ int result;
+ u64 rtc_val;
+ u64 tb_val;
+
+ result = lv1_get_rtc(&rtc_val, &tb_val);
+ BUG_ON(result);
+
+ return rtc_val;
+}
+
+time64_t __init ps3_get_boot_time(void)
+{
+ return read_rtc() + ps3_os_area_get_rtc_diff();
+}
+
+static int __init ps3_rtc_init(void)
+{
+ struct platform_device *pdev;
+
+ if (!firmware_has_feature(FW_FEATURE_PS3_LV1))
+ return -ENODEV;
+
+ pdev = platform_device_register_simple("rtc-ps3", -1, NULL, 0);
+
+ return PTR_ERR_OR_ZERO(pdev);
+}
+device_initcall(ps3_rtc_init);
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
new file mode 100644
index 000000000..4ebf2ef28
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -0,0 +1,186 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_PSERIES
+ depends on PPC64 && PPC_BOOK3S
+ bool "IBM pSeries & new (POWER5-based) iSeries"
+ select HAVE_PCSPKR_PLATFORM
+ select MPIC
+ select OF_DYNAMIC
+ select FORCE_PCI
+ select PCI_MSI
+ select GENERIC_ALLOCATOR
+ select PPC_XICS
+ select PPC_XIVE_SPAPR
+ select PPC_ICP_NATIVE
+ select PPC_ICP_HV
+ select PPC_ICS_RTAS
+ select PPC_I8259
+ select PPC_RTAS
+ select PPC_RTAS_DAEMON
+ select RTAS_ERROR_LOGGING
+ select PPC_UDBG_16550
+ select PPC_DOORBELL
+ select HOTPLUG_CPU
+ select FORCE_SMP
+ select SWIOTLB
+ select ARCH_SUPPORTS_PER_VMA_LOCK
+ default y
+
+config PARAVIRT
+ bool
+
+config PARAVIRT_SPINLOCKS
+ bool
+
+config PARAVIRT_TIME_ACCOUNTING
+ select PARAVIRT
+ bool
+
+config PPC_SPLPAR
+ bool "Support for shared-processor logical partitions"
+ depends on PPC_PSERIES
+ select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS
+ select PARAVIRT_TIME_ACCOUNTING if VIRT_CPU_ACCOUNTING_GEN
+ default y
+ help
+ Enabling this option will make the kernel run more efficiently
+ on logically-partitioned pSeries systems which use shared
+ processors, that is, which share physical processors between
+ two or more partitions.
+
+ Say Y if you are unsure.
+
+config DTL
+ bool "Dispatch Trace Log"
+ depends on PPC_SPLPAR && DEBUG_FS
+ help
+ SPLPAR machines can log hypervisor preempt & dispatch events to a
+ kernel buffer. Saying Y here will enable logging these events,
+ which are accessible through a debugfs file.
+
+ Say N if you are unsure.
+
+config PSERIES_ENERGY
+ tristate "pSeries energy management capabilities driver"
+ depends on PPC_PSERIES
+ default y
+ help
+ Provides interface to platform energy management capabilities
+ on supported PSERIES platforms.
+ Provides: /sys/devices/system/cpu/pseries_(de)activation_hint_list
+ and /sys/devices/system/cpu/cpuN/pseries_(de)activation_hint
+
+config IO_EVENT_IRQ
+ bool "IO Event Interrupt support"
+ depends on PPC_PSERIES
+ default y
+ help
+ Select this option, if you want to enable support for IO Event
+ interrupts. IO event interrupt is a mechanism provided by RTAS
+ to return information about hardware error and non-error events
+ which may need OS attention. RTAS returns events for multiple
+ event types and scopes. Device drivers can register their handlers
+ to receive events.
+
+ This option will only enable the IO event platform code. You
+ will still need to enable or compile the actual drivers
+ that use this infrastructure to handle IO event interrupts.
+
+ Say Y if you are unsure.
+
+config LPARCFG
+ bool "LPAR Configuration Data"
+ depends on PPC_PSERIES
+ help
+ Provide system capacity information via human readable
+ <key word>=<value> pairs through a /proc/ppc64/lparcfg interface.
+
+config PPC_PSERIES_DEBUG
+ depends on PPC_PSERIES && PPC_EARLY_DEBUG
+ bool "Enable extra debug logging in platforms/pseries"
+ default y
+ help
+ Say Y here if you want the pseries core to produce a bunch of
+ debug messages to the system log. Select this if you are having a
+ problem with the pseries core and want to see more of what is
+ going on. This does not enable debugging in lpar.c, which must
+ be manually done due to its verbosity.
+
+config PPC_SMLPAR
+ bool "Support for shared-memory logical partitions"
+ depends on PPC_PSERIES
+ select LPARCFG
+ help
+ Select this option to enable shared memory partition support.
+ With this option a system running in an LPAR can be given more
+ memory than physically available and will allow firmware to
+ balance memory across many LPARs.
+
+config CMM
+ tristate "Collaborative memory management"
+ depends on PPC_SMLPAR
+ select MEMORY_BALLOON
+ default y
+ help
+ Select this option, if you want to enable the kernel interface
+ to reduce the memory size of the system. This is accomplished
+ by allocating pages of memory and put them "on hold". This only
+ makes sense for a system running in an LPAR where the unused pages
+ will be reused for other LPARs. The interface allows firmware to
+ balance memory across many LPARs.
+
+config HV_PERF_CTRS
+ bool "Hypervisor supplied PMU events (24x7 & GPCI)"
+ default y
+ depends on PERF_EVENTS && PPC_PSERIES
+ help
+ Enable access to hypervisor supplied counters in perf. Currently,
+ this enables code that uses the hcall GetPerfCounterInfo and 24x7
+ interfaces to retrieve counters. GPCI exists on Power 6 and later
+ systems. 24x7 is available on Power 8 and later systems.
+
+ If unsure, select Y.
+
+config IBMVIO
+ depends on PPC_PSERIES
+ bool
+ default y
+
+config IBMEBUS
+ depends on PPC_PSERIES && !CPU_LITTLE_ENDIAN
+ bool "Support for GX bus based adapters"
+ help
+ Bus device driver for GX bus based adapters.
+
+config PSERIES_PLPKS
+ depends on PPC_PSERIES
+ select NLS
+ bool
+ # PowerVM provides an isolated Platform Keystore (PKS) storage
+ # allocation for each LPAR with individually managed access
+ # controls to store sensitive information securely. It can be
+ # used to store asymmetric public keys or secrets as required
+ # by different usecases.
+ #
+ # This option is selected by in-kernel consumers that require
+ # access to the PKS.
+
+config PAPR_SCM
+ depends on PPC_PSERIES && MEMORY_HOTPLUG && LIBNVDIMM
+ tristate "Support for the PAPR Storage Class Memory interface"
+ help
+ Enable access to hypervisor provided storage class memory.
+
+config PPC_SVM
+ bool "Secure virtual machine (SVM) support for POWER"
+ depends on PPC_PSERIES
+ select SWIOTLB
+ select ARCH_HAS_MEM_ENCRYPT
+ select ARCH_HAS_FORCE_DMA_UNENCRYPTED
+ select ARCH_HAS_CC_PLATFORM
+ help
+ There are certain POWER platforms which support secure guests using
+ the Protected Execution Facility, with the help of an Ultravisor
+ executing below the hypervisor layer. This enables support for
+ those guests.
+
+ If unsure, say "N".
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
new file mode 100644
index 000000000..53c3b91af
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
+
+obj-y := lpar.o hvCall.o nvram.o reconfig.o \
+ of_helpers.o rtas-work-area.o papr-sysparm.o \
+ setup.o iommu.o event_sources.o ras.o \
+ firmware.o power.o dlpar.o mobility.o rng.o \
+ pci.o pci_dlpar.o eeh_pseries.o msi.o \
+ papr_platform_attributes.o dtl.o
+obj-$(CONFIG_SMP) += smp.o
+obj-$(CONFIG_KEXEC_CORE) += kexec.o
+obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
+
+obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
+obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o pmem.o
+
+obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
+obj-$(CONFIG_HVCS) += hvcserver.o
+obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
+obj-$(CONFIG_CMM) += cmm.o
+obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
+obj-$(CONFIG_LPARCFG) += lparcfg.o
+obj-$(CONFIG_IBMVIO) += vio.o
+obj-$(CONFIG_IBMEBUS) += ibmebus.o
+obj-$(CONFIG_PAPR_SCM) += papr_scm.o
+obj-$(CONFIG_PPC_SPLPAR) += vphn.o
+obj-$(CONFIG_PPC_SVM) += svm.o
+obj-$(CONFIG_FA_DUMP) += rtas-fadump.o
+obj-$(CONFIG_PSERIES_PLPKS) += plpks.o
+obj-$(CONFIG_PPC_SECURE_BOOT) += plpks-secvar.o
+obj-$(CONFIG_SUSPEND) += suspend.o
+obj-$(CONFIG_PPC_VAS) += vas.o vas-sysfs.o
+
+obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o
+
+# nothing that operates in real mode is safe for KASAN
+KASAN_SANITIZE_ras.o := n
+KASAN_SANITIZE_kexec.o := n
diff --git a/arch/powerpc/platforms/pseries/cc_platform.c b/arch/powerpc/platforms/pseries/cc_platform.c
new file mode 100644
index 000000000..e8021af83
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cc_platform.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Confidential Computing Platform Capability checks
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ */
+
+#include <linux/export.h>
+#include <linux/cc_platform.h>
+
+#include <asm/machdep.h>
+#include <asm/svm.h>
+
+bool cc_platform_has(enum cc_attr attr)
+{
+ switch (attr) {
+ case CC_ATTR_MEM_ENCRYPT:
+ return is_secure_guest();
+
+ default:
+ return false;
+ }
+}
+EXPORT_SYMBOL_GPL(cc_platform_has);
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
new file mode 100644
index 000000000..5f4037c1d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Collaborative memory management interface.
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * Author(s): Brian King (brking@linux.vnet.ibm.com),
+ */
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/oom.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/stringify.h>
+#include <linux/swap.h>
+#include <linux/device.h>
+#include <linux/balloon_compaction.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/mmu.h>
+#include <linux/uaccess.h>
+#include <linux/memory.h>
+#include <asm/plpar_wrappers.h>
+
+#include "pseries.h"
+
+#define CMM_DRIVER_VERSION "1.0.0"
+#define CMM_DEFAULT_DELAY 1
+#define CMM_HOTPLUG_DELAY 5
+#define CMM_DEBUG 0
+#define CMM_DISABLE 0
+#define CMM_OOM_KB 1024
+#define CMM_MIN_MEM_MB 256
+#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+
+#define CMM_MEM_HOTPLUG_PRI 1
+
+static unsigned int delay = CMM_DEFAULT_DELAY;
+static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
+static unsigned int oom_kb = CMM_OOM_KB;
+static unsigned int cmm_debug = CMM_DEBUG;
+static unsigned int cmm_disabled = CMM_DISABLE;
+static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
+static bool __read_mostly simulate;
+static unsigned long simulate_loan_target_kb;
+static struct device cmm_dev;
+
+MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(CMM_DRIVER_VERSION);
+
+module_param_named(delay, delay, uint, 0644);
+MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
+ "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
+module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
+MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
+ "before loaning resumes. "
+ "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
+module_param_named(oom_kb, oom_kb, uint, 0644);
+MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
+ "[Default=" __stringify(CMM_OOM_KB) "]");
+module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
+MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
+ "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
+module_param_named(debug, cmm_debug, uint, 0644);
+MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
+ "[Default=" __stringify(CMM_DEBUG) "]");
+module_param_named(simulate, simulate, bool, 0444);
+MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
+
+#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
+
+static atomic_long_t loaned_pages;
+static unsigned long loaned_pages_target;
+static unsigned long oom_freed_pages;
+
+static DEFINE_MUTEX(hotplug_mutex);
+static int hotplug_occurred; /* protected by the hotplug mutex */
+
+static struct task_struct *cmm_thread_ptr;
+static struct balloon_dev_info b_dev_info;
+
+static long plpar_page_set_loaned(struct page *page)
+{
+ const unsigned long vpa = page_to_phys(page);
+ unsigned long cmo_page_sz = cmo_get_page_size();
+ long rc = 0;
+ int i;
+
+ if (unlikely(simulate))
+ return 0;
+
+ for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
+ rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
+
+ for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
+ plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
+ vpa + i - cmo_page_sz, 0);
+
+ return rc;
+}
+
+static long plpar_page_set_active(struct page *page)
+{
+ const unsigned long vpa = page_to_phys(page);
+ unsigned long cmo_page_sz = cmo_get_page_size();
+ long rc = 0;
+ int i;
+
+ if (unlikely(simulate))
+ return 0;
+
+ for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
+ rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
+
+ for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
+ plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
+ vpa + i - cmo_page_sz, 0);
+
+ return rc;
+}
+
+/**
+ * cmm_alloc_pages - Allocate pages and mark them as loaned
+ * @nr: number of pages to allocate
+ *
+ * Return value:
+ * number of pages requested to be allocated which were not
+ **/
+static long cmm_alloc_pages(long nr)
+{
+ struct page *page;
+ long rc;
+
+ cmm_dbg("Begin request for %ld pages\n", nr);
+
+ while (nr) {
+ /* Exit if a hotplug operation is in progress or occurred */
+ if (mutex_trylock(&hotplug_mutex)) {
+ if (hotplug_occurred) {
+ mutex_unlock(&hotplug_mutex);
+ break;
+ }
+ mutex_unlock(&hotplug_mutex);
+ } else {
+ break;
+ }
+
+ page = balloon_page_alloc();
+ if (!page)
+ break;
+ rc = plpar_page_set_loaned(page);
+ if (rc) {
+ pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
+ __free_page(page);
+ break;
+ }
+
+ balloon_page_enqueue(&b_dev_info, page);
+ atomic_long_inc(&loaned_pages);
+ adjust_managed_page_count(page, -1);
+ nr--;
+ }
+
+ cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+ return nr;
+}
+
+/**
+ * cmm_free_pages - Free pages and mark them as active
+ * @nr: number of pages to free
+ *
+ * Return value:
+ * number of pages requested to be freed which were not
+ **/
+static long cmm_free_pages(long nr)
+{
+ struct page *page;
+
+ cmm_dbg("Begin free of %ld pages.\n", nr);
+ while (nr) {
+ page = balloon_page_dequeue(&b_dev_info);
+ if (!page)
+ break;
+ plpar_page_set_active(page);
+ adjust_managed_page_count(page, 1);
+ __free_page(page);
+ atomic_long_dec(&loaned_pages);
+ nr--;
+ }
+ cmm_dbg("End request with %ld pages unfulfilled\n", nr);
+ return nr;
+}
+
+/**
+ * cmm_oom_notify - OOM notifier
+ * @self: notifier block struct
+ * @dummy: not used
+ * @parm: returned - number of pages freed
+ *
+ * Return value:
+ * NOTIFY_OK
+ **/
+static int cmm_oom_notify(struct notifier_block *self,
+ unsigned long dummy, void *parm)
+{
+ unsigned long *freed = parm;
+ long nr = KB2PAGES(oom_kb);
+
+ cmm_dbg("OOM processing started\n");
+ nr = cmm_free_pages(nr);
+ loaned_pages_target = atomic_long_read(&loaned_pages);
+ *freed += KB2PAGES(oom_kb) - nr;
+ oom_freed_pages += KB2PAGES(oom_kb) - nr;
+ cmm_dbg("OOM processing complete\n");
+ return NOTIFY_OK;
+}
+
+/**
+ * cmm_get_mpp - Read memory performance parameters
+ *
+ * Makes hcall to query the current page loan request from the hypervisor.
+ *
+ * Return value:
+ * nothing
+ **/
+static void cmm_get_mpp(void)
+{
+ const long __loaned_pages = atomic_long_read(&loaned_pages);
+ const long total_pages = totalram_pages() + __loaned_pages;
+ int rc;
+ struct hvcall_mpp_data mpp_data;
+ signed long active_pages_target, page_loan_request, target;
+ signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
+
+ if (likely(!simulate)) {
+ rc = h_get_mpp(&mpp_data);
+ if (rc != H_SUCCESS)
+ return;
+ page_loan_request = div_s64((s64)mpp_data.loan_request,
+ PAGE_SIZE);
+ target = page_loan_request + __loaned_pages;
+ } else {
+ target = KB2PAGES(simulate_loan_target_kb);
+ page_loan_request = target - __loaned_pages;
+ }
+
+ if (target < 0 || total_pages < min_mem_pages)
+ target = 0;
+
+ if (target > oom_freed_pages)
+ target -= oom_freed_pages;
+ else
+ target = 0;
+
+ active_pages_target = total_pages - target;
+
+ if (min_mem_pages > active_pages_target)
+ target = total_pages - min_mem_pages;
+
+ if (target < 0)
+ target = 0;
+
+ loaned_pages_target = target;
+
+ cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
+ page_loan_request, __loaned_pages, loaned_pages_target,
+ oom_freed_pages, totalram_pages());
+}
+
+static struct notifier_block cmm_oom_nb = {
+ .notifier_call = cmm_oom_notify
+};
+
+/**
+ * cmm_thread - CMM task thread
+ * @dummy: not used
+ *
+ * Return value:
+ * 0
+ **/
+static int cmm_thread(void *dummy)
+{
+ unsigned long timeleft;
+ long __loaned_pages;
+
+ while (1) {
+ timeleft = msleep_interruptible(delay * 1000);
+
+ if (kthread_should_stop() || timeleft)
+ break;
+
+ if (mutex_trylock(&hotplug_mutex)) {
+ if (hotplug_occurred) {
+ hotplug_occurred = 0;
+ mutex_unlock(&hotplug_mutex);
+ cmm_dbg("Hotplug operation has occurred, "
+ "loaning activity suspended "
+ "for %d seconds.\n",
+ hotplug_delay);
+ timeleft = msleep_interruptible(hotplug_delay *
+ 1000);
+ if (kthread_should_stop() || timeleft)
+ break;
+ continue;
+ }
+ mutex_unlock(&hotplug_mutex);
+ } else {
+ cmm_dbg("Hotplug operation in progress, activity "
+ "suspended\n");
+ continue;
+ }
+
+ cmm_get_mpp();
+
+ __loaned_pages = atomic_long_read(&loaned_pages);
+ if (loaned_pages_target > __loaned_pages) {
+ if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
+ loaned_pages_target = __loaned_pages;
+ } else if (loaned_pages_target < __loaned_pages)
+ cmm_free_pages(__loaned_pages - loaned_pages_target);
+ }
+ return 0;
+}
+
+#define CMM_SHOW(name, format, args...) \
+ static ssize_t show_##name(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+ { \
+ return sprintf(buf, format, ##args); \
+ } \
+ static DEVICE_ATTR(name, 0444, show_##name, NULL)
+
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
+CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
+
+static ssize_t show_oom_pages(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
+}
+
+static ssize_t store_oom_pages(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val = simple_strtoul (buf, NULL, 10);
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (val != 0)
+ return -EBADMSG;
+
+ oom_freed_pages = 0;
+ return count;
+}
+
+static DEVICE_ATTR(oom_freed_kb, 0644,
+ show_oom_pages, store_oom_pages);
+
+static struct device_attribute *cmm_attrs[] = {
+ &dev_attr_loaned_kb,
+ &dev_attr_loaned_target_kb,
+ &dev_attr_oom_freed_kb,
+};
+
+static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
+ simulate_loan_target_kb);
+
+static struct bus_type cmm_subsys = {
+ .name = "cmm",
+ .dev_name = "cmm",
+};
+
+static void cmm_release_device(struct device *dev)
+{
+}
+
+/**
+ * cmm_sysfs_register - Register with sysfs
+ *
+ * Return value:
+ * 0 on success / other on failure
+ **/
+static int cmm_sysfs_register(struct device *dev)
+{
+ int i, rc;
+
+ if ((rc = subsys_system_register(&cmm_subsys, NULL)))
+ return rc;
+
+ dev->id = 0;
+ dev->bus = &cmm_subsys;
+ dev->release = cmm_release_device;
+
+ if ((rc = device_register(dev)))
+ goto subsys_unregister;
+
+ for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
+ if ((rc = device_create_file(dev, cmm_attrs[i])))
+ goto fail;
+ }
+
+ if (!simulate)
+ return 0;
+ rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
+ if (rc)
+ goto fail;
+ return 0;
+
+fail:
+ while (--i >= 0)
+ device_remove_file(dev, cmm_attrs[i]);
+ device_unregister(dev);
+subsys_unregister:
+ bus_unregister(&cmm_subsys);
+ return rc;
+}
+
+/**
+ * cmm_unregister_sysfs - Unregister from sysfs
+ *
+ **/
+static void cmm_unregister_sysfs(struct device *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
+ device_remove_file(dev, cmm_attrs[i]);
+ device_unregister(dev);
+ bus_unregister(&cmm_subsys);
+}
+
+/**
+ * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
+ *
+ **/
+static int cmm_reboot_notifier(struct notifier_block *nb,
+ unsigned long action, void *unused)
+{
+ if (action == SYS_RESTART) {
+ if (cmm_thread_ptr)
+ kthread_stop(cmm_thread_ptr);
+ cmm_thread_ptr = NULL;
+ cmm_free_pages(atomic_long_read(&loaned_pages));
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block cmm_reboot_nb = {
+ .notifier_call = cmm_reboot_notifier,
+};
+
+/**
+ * cmm_memory_cb - Handle memory hotplug notifier calls
+ * @self: notifier block struct
+ * @action: action to take
+ * @arg: struct memory_notify data for handler
+ *
+ * Return value:
+ * NOTIFY_OK or notifier error based on subfunction return value
+ *
+ **/
+static int cmm_memory_cb(struct notifier_block *self,
+ unsigned long action, void *arg)
+{
+ switch (action) {
+ case MEM_GOING_OFFLINE:
+ mutex_lock(&hotplug_mutex);
+ hotplug_occurred = 1;
+ break;
+ case MEM_OFFLINE:
+ case MEM_CANCEL_OFFLINE:
+ mutex_unlock(&hotplug_mutex);
+ cmm_dbg("Memory offline operation complete.\n");
+ break;
+ case MEM_GOING_ONLINE:
+ case MEM_ONLINE:
+ case MEM_CANCEL_ONLINE:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cmm_mem_nb = {
+ .notifier_call = cmm_memory_cb,
+ .priority = CMM_MEM_HOTPLUG_PRI
+};
+
+#ifdef CONFIG_BALLOON_COMPACTION
+static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
+ struct page *newpage, struct page *page,
+ enum migrate_mode mode)
+{
+ unsigned long flags;
+
+ /*
+ * loan/"inflate" the newpage first.
+ *
+ * We might race against the cmm_thread who might discover after our
+ * loan request that another page is to be unloaned. However, once
+ * the cmm_thread runs again later, this error will automatically
+ * be corrected.
+ */
+ if (plpar_page_set_loaned(newpage)) {
+ /* Unlikely, but possible. Tell the caller not to retry now. */
+ pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
+ return -EBUSY;
+ }
+
+ /* balloon page list reference */
+ get_page(newpage);
+
+ /*
+ * When we migrate a page to a different zone, we have to fixup the
+ * count of both involved zones as we adjusted the managed page count
+ * when inflating.
+ */
+ if (page_zone(page) != page_zone(newpage)) {
+ adjust_managed_page_count(page, 1);
+ adjust_managed_page_count(newpage, -1);
+ }
+
+ spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+ balloon_page_insert(b_dev_info, newpage);
+ balloon_page_delete(page);
+ b_dev_info->isolated_pages--;
+ spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+
+ /*
+ * activate/"deflate" the old page. We ignore any errors just like the
+ * other callers.
+ */
+ plpar_page_set_active(page);
+
+ /* balloon page list reference */
+ put_page(page);
+
+ return MIGRATEPAGE_SUCCESS;
+}
+
+static void cmm_balloon_compaction_init(void)
+{
+ balloon_devinfo_init(&b_dev_info);
+ b_dev_info.migratepage = cmm_migratepage;
+}
+#else /* CONFIG_BALLOON_COMPACTION */
+static void cmm_balloon_compaction_init(void)
+{
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+
+/**
+ * cmm_init - Module initialization
+ *
+ * Return value:
+ * 0 on success / other on failure
+ **/
+static int cmm_init(void)
+{
+ int rc;
+
+ if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
+ return -EOPNOTSUPP;
+
+ cmm_balloon_compaction_init();
+
+ rc = register_oom_notifier(&cmm_oom_nb);
+ if (rc < 0)
+ goto out_balloon_compaction;
+
+ if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
+ goto out_oom_notifier;
+
+ if ((rc = cmm_sysfs_register(&cmm_dev)))
+ goto out_reboot_notifier;
+
+ rc = register_memory_notifier(&cmm_mem_nb);
+ if (rc)
+ goto out_unregister_notifier;
+
+ if (cmm_disabled)
+ return 0;
+
+ cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+ if (IS_ERR(cmm_thread_ptr)) {
+ rc = PTR_ERR(cmm_thread_ptr);
+ goto out_unregister_notifier;
+ }
+
+ return 0;
+out_unregister_notifier:
+ unregister_memory_notifier(&cmm_mem_nb);
+ cmm_unregister_sysfs(&cmm_dev);
+out_reboot_notifier:
+ unregister_reboot_notifier(&cmm_reboot_nb);
+out_oom_notifier:
+ unregister_oom_notifier(&cmm_oom_nb);
+out_balloon_compaction:
+ return rc;
+}
+
+/**
+ * cmm_exit - Module exit
+ *
+ * Return value:
+ * nothing
+ **/
+static void cmm_exit(void)
+{
+ if (cmm_thread_ptr)
+ kthread_stop(cmm_thread_ptr);
+ unregister_oom_notifier(&cmm_oom_nb);
+ unregister_reboot_notifier(&cmm_reboot_nb);
+ unregister_memory_notifier(&cmm_mem_nb);
+ cmm_free_pages(atomic_long_read(&loaned_pages));
+ cmm_unregister_sysfs(&cmm_dev);
+}
+
+/**
+ * cmm_set_disable - Disable/Enable CMM
+ *
+ * Return value:
+ * 0 on success / other on failure
+ **/
+static int cmm_set_disable(const char *val, const struct kernel_param *kp)
+{
+ int disable = simple_strtoul(val, NULL, 10);
+
+ if (disable != 0 && disable != 1)
+ return -EINVAL;
+
+ if (disable && !cmm_disabled) {
+ if (cmm_thread_ptr)
+ kthread_stop(cmm_thread_ptr);
+ cmm_thread_ptr = NULL;
+ cmm_free_pages(atomic_long_read(&loaned_pages));
+ } else if (!disable && cmm_disabled) {
+ cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
+ if (IS_ERR(cmm_thread_ptr))
+ return PTR_ERR(cmm_thread_ptr);
+ }
+
+ cmm_disabled = disable;
+ return 0;
+}
+
+module_param_call(disable, cmm_set_disable, param_get_uint,
+ &cmm_disabled, 0644);
+MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
+ "[Default=" __stringify(CMM_DISABLE) "]");
+
+module_init(cmm_init);
+module_exit(cmm_exit);
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
new file mode 100644
index 000000000..47f8eabd1
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -0,0 +1,583 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support for dynamic reconfiguration for PCI, Memory, and CPU
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms.
+ *
+ * Copyright (C) 2009 Nathan Fontenot
+ * Copyright (C) 2009 IBM Corporation
+ */
+
+#define pr_fmt(fmt) "dlpar: " fmt
+
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include "of_helpers.h"
+#include "pseries.h"
+
+#include <asm/machdep.h>
+#include <linux/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/rtas-work-area.h>
+
+static struct workqueue_struct *pseries_hp_wq;
+
+struct pseries_hp_work {
+ struct work_struct work;
+ struct pseries_hp_errorlog *errlog;
+};
+
+struct cc_workarea {
+ __be32 drc_index;
+ __be32 zero;
+ __be32 name_offset;
+ __be32 prop_length;
+ __be32 prop_offset;
+};
+
+void dlpar_free_cc_property(struct property *prop)
+{
+ kfree(prop->name);
+ kfree(prop->value);
+ kfree(prop);
+}
+
+static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa)
+{
+ struct property *prop;
+ char *name;
+ char *value;
+
+ prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+ if (!prop)
+ return NULL;
+
+ name = (char *)ccwa + be32_to_cpu(ccwa->name_offset);
+ prop->name = kstrdup(name, GFP_KERNEL);
+ if (!prop->name) {
+ dlpar_free_cc_property(prop);
+ return NULL;
+ }
+
+ prop->length = be32_to_cpu(ccwa->prop_length);
+ value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset);
+ prop->value = kmemdup(value, prop->length, GFP_KERNEL);
+ if (!prop->value) {
+ dlpar_free_cc_property(prop);
+ return NULL;
+ }
+
+ return prop;
+}
+
+static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa)
+{
+ struct device_node *dn;
+ const char *name;
+
+ dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+ if (!dn)
+ return NULL;
+
+ name = (const char *)ccwa + be32_to_cpu(ccwa->name_offset);
+ dn->full_name = kstrdup(name, GFP_KERNEL);
+ if (!dn->full_name) {
+ kfree(dn);
+ return NULL;
+ }
+
+ of_node_set_flag(dn, OF_DYNAMIC);
+ of_node_init(dn);
+
+ return dn;
+}
+
+static void dlpar_free_one_cc_node(struct device_node *dn)
+{
+ struct property *prop;
+
+ while (dn->properties) {
+ prop = dn->properties;
+ dn->properties = prop->next;
+ dlpar_free_cc_property(prop);
+ }
+
+ kfree(dn->full_name);
+ kfree(dn);
+}
+
+void dlpar_free_cc_nodes(struct device_node *dn)
+{
+ if (dn->child)
+ dlpar_free_cc_nodes(dn->child);
+
+ if (dn->sibling)
+ dlpar_free_cc_nodes(dn->sibling);
+
+ dlpar_free_one_cc_node(dn);
+}
+
+#define COMPLETE 0
+#define NEXT_SIBLING 1
+#define NEXT_CHILD 2
+#define NEXT_PROPERTY 3
+#define PREV_PARENT 4
+#define MORE_MEMORY 5
+#define ERR_CFG_USE -9003
+
+struct device_node *dlpar_configure_connector(__be32 drc_index,
+ struct device_node *parent)
+{
+ struct device_node *dn;
+ struct device_node *first_dn = NULL;
+ struct device_node *last_dn = NULL;
+ struct property *property;
+ struct property *last_property = NULL;
+ struct cc_workarea *ccwa;
+ struct rtas_work_area *work_area;
+ char *data_buf;
+ int cc_token;
+ int rc = -1;
+
+ cc_token = rtas_function_token(RTAS_FN_IBM_CONFIGURE_CONNECTOR);
+ if (cc_token == RTAS_UNKNOWN_SERVICE)
+ return NULL;
+
+ work_area = rtas_work_area_alloc(SZ_4K);
+ data_buf = rtas_work_area_raw_buf(work_area);
+
+ ccwa = (struct cc_workarea *)&data_buf[0];
+ ccwa->drc_index = drc_index;
+ ccwa->zero = 0;
+
+ do {
+ do {
+ rc = rtas_call(cc_token, 2, 1, NULL,
+ rtas_work_area_phys(work_area), NULL);
+ } while (rtas_busy_delay(rc));
+
+ switch (rc) {
+ case COMPLETE:
+ break;
+
+ case NEXT_SIBLING:
+ dn = dlpar_parse_cc_node(ccwa);
+ if (!dn)
+ goto cc_error;
+
+ dn->parent = last_dn->parent;
+ last_dn->sibling = dn;
+ last_dn = dn;
+ break;
+
+ case NEXT_CHILD:
+ dn = dlpar_parse_cc_node(ccwa);
+ if (!dn)
+ goto cc_error;
+
+ if (!first_dn) {
+ dn->parent = parent;
+ first_dn = dn;
+ } else {
+ dn->parent = last_dn;
+ if (last_dn)
+ last_dn->child = dn;
+ }
+
+ last_dn = dn;
+ break;
+
+ case NEXT_PROPERTY:
+ property = dlpar_parse_cc_property(ccwa);
+ if (!property)
+ goto cc_error;
+
+ if (!last_dn->properties)
+ last_dn->properties = property;
+ else
+ last_property->next = property;
+
+ last_property = property;
+ break;
+
+ case PREV_PARENT:
+ last_dn = last_dn->parent;
+ break;
+
+ case MORE_MEMORY:
+ case ERR_CFG_USE:
+ default:
+ printk(KERN_ERR "Unexpected Error (%d) "
+ "returned from configure-connector\n", rc);
+ goto cc_error;
+ }
+ } while (rc);
+
+cc_error:
+ rtas_work_area_free(work_area);
+
+ if (rc) {
+ if (first_dn)
+ dlpar_free_cc_nodes(first_dn);
+
+ return NULL;
+ }
+
+ return first_dn;
+}
+
+int dlpar_attach_node(struct device_node *dn, struct device_node *parent)
+{
+ int rc;
+
+ dn->parent = parent;
+
+ rc = of_attach_node(dn);
+ if (rc) {
+ printk(KERN_ERR "Failed to add device node %pOF\n", dn);
+ return rc;
+ }
+
+ return 0;
+}
+
+int dlpar_detach_node(struct device_node *dn)
+{
+ struct device_node *child;
+ int rc;
+
+ child = of_get_next_child(dn, NULL);
+ while (child) {
+ dlpar_detach_node(child);
+ child = of_get_next_child(dn, child);
+ }
+
+ rc = of_detach_node(dn);
+ if (rc)
+ return rc;
+
+ of_node_put(dn);
+
+ return 0;
+}
+
+#define DR_ENTITY_SENSE 9003
+#define DR_ENTITY_PRESENT 1
+#define DR_ENTITY_UNUSABLE 2
+#define ALLOCATION_STATE 9003
+#define ALLOC_UNUSABLE 0
+#define ALLOC_USABLE 1
+#define ISOLATION_STATE 9001
+#define ISOLATE 0
+#define UNISOLATE 1
+
+int dlpar_acquire_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
+ if (rc || dr_status != DR_ENTITY_UNUSABLE)
+ return -1;
+
+ rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
+ if (rc)
+ return rc;
+
+ rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+ if (rc) {
+ rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+ return rc;
+ }
+
+ return 0;
+}
+
+int dlpar_release_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
+ if (rc || dr_status != DR_ENTITY_PRESENT)
+ return -1;
+
+ rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
+ if (rc)
+ return rc;
+
+ rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+ if (rc) {
+ rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+ return rc;
+ }
+
+ return 0;
+}
+
+int dlpar_unisolate_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_get_sensor(DR_ENTITY_SENSE, drc_index, &dr_status);
+ if (rc || dr_status != DR_ENTITY_PRESENT)
+ return -1;
+
+ rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+
+ return 0;
+}
+
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
+{
+ int rc;
+
+ /* pseries error logs are in BE format, convert to cpu type */
+ switch (hp_elog->id_type) {
+ case PSERIES_HP_ELOG_ID_DRC_COUNT:
+ hp_elog->_drc_u.drc_count =
+ be32_to_cpu(hp_elog->_drc_u.drc_count);
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_INDEX:
+ hp_elog->_drc_u.drc_index =
+ be32_to_cpu(hp_elog->_drc_u.drc_index);
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_IC:
+ hp_elog->_drc_u.ic.count =
+ be32_to_cpu(hp_elog->_drc_u.ic.count);
+ hp_elog->_drc_u.ic.index =
+ be32_to_cpu(hp_elog->_drc_u.ic.index);
+ }
+
+ switch (hp_elog->resource) {
+ case PSERIES_HP_ELOG_RESOURCE_MEM:
+ rc = dlpar_memory(hp_elog);
+ break;
+ case PSERIES_HP_ELOG_RESOURCE_CPU:
+ rc = dlpar_cpu(hp_elog);
+ break;
+ case PSERIES_HP_ELOG_RESOURCE_PMEM:
+ rc = dlpar_hp_pmem(hp_elog);
+ break;
+
+ default:
+ pr_warn_ratelimited("Invalid resource (%d) specified\n",
+ hp_elog->resource);
+ rc = -EINVAL;
+ }
+
+ return rc;
+}
+
+static void pseries_hp_work_fn(struct work_struct *work)
+{
+ struct pseries_hp_work *hp_work =
+ container_of(work, struct pseries_hp_work, work);
+
+ handle_dlpar_errorlog(hp_work->errlog);
+
+ kfree(hp_work->errlog);
+ kfree(work);
+}
+
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog)
+{
+ struct pseries_hp_work *work;
+ struct pseries_hp_errorlog *hp_errlog_copy;
+
+ hp_errlog_copy = kmemdup(hp_errlog, sizeof(*hp_errlog), GFP_ATOMIC);
+ if (!hp_errlog_copy)
+ return;
+
+ work = kmalloc(sizeof(struct pseries_hp_work), GFP_ATOMIC);
+ if (work) {
+ INIT_WORK((struct work_struct *)work, pseries_hp_work_fn);
+ work->errlog = hp_errlog_copy;
+ queue_work(pseries_hp_wq, (struct work_struct *)work);
+ } else {
+ kfree(hp_errlog_copy);
+ }
+}
+
+static int dlpar_parse_resource(char **cmd, struct pseries_hp_errorlog *hp_elog)
+{
+ char *arg;
+
+ arg = strsep(cmd, " ");
+ if (!arg)
+ return -EINVAL;
+
+ if (sysfs_streq(arg, "memory")) {
+ hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
+ } else if (sysfs_streq(arg, "cpu")) {
+ hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU;
+ } else {
+ pr_err("Invalid resource specified.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dlpar_parse_action(char **cmd, struct pseries_hp_errorlog *hp_elog)
+{
+ char *arg;
+
+ arg = strsep(cmd, " ");
+ if (!arg)
+ return -EINVAL;
+
+ if (sysfs_streq(arg, "add")) {
+ hp_elog->action = PSERIES_HP_ELOG_ACTION_ADD;
+ } else if (sysfs_streq(arg, "remove")) {
+ hp_elog->action = PSERIES_HP_ELOG_ACTION_REMOVE;
+ } else {
+ pr_err("Invalid action specified.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dlpar_parse_id_type(char **cmd, struct pseries_hp_errorlog *hp_elog)
+{
+ char *arg;
+ u32 count, index;
+
+ arg = strsep(cmd, " ");
+ if (!arg)
+ return -EINVAL;
+
+ if (sysfs_streq(arg, "indexed-count")) {
+ hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_IC;
+ arg = strsep(cmd, " ");
+ if (!arg) {
+ pr_err("No DRC count specified.\n");
+ return -EINVAL;
+ }
+
+ if (kstrtou32(arg, 0, &count)) {
+ pr_err("Invalid DRC count specified.\n");
+ return -EINVAL;
+ }
+
+ arg = strsep(cmd, " ");
+ if (!arg) {
+ pr_err("No DRC Index specified.\n");
+ return -EINVAL;
+ }
+
+ if (kstrtou32(arg, 0, &index)) {
+ pr_err("Invalid DRC Index specified.\n");
+ return -EINVAL;
+ }
+
+ hp_elog->_drc_u.ic.count = cpu_to_be32(count);
+ hp_elog->_drc_u.ic.index = cpu_to_be32(index);
+ } else if (sysfs_streq(arg, "index")) {
+ hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+ arg = strsep(cmd, " ");
+ if (!arg) {
+ pr_err("No DRC Index specified.\n");
+ return -EINVAL;
+ }
+
+ if (kstrtou32(arg, 0, &index)) {
+ pr_err("Invalid DRC Index specified.\n");
+ return -EINVAL;
+ }
+
+ hp_elog->_drc_u.drc_index = cpu_to_be32(index);
+ } else if (sysfs_streq(arg, "count")) {
+ hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_COUNT;
+ arg = strsep(cmd, " ");
+ if (!arg) {
+ pr_err("No DRC count specified.\n");
+ return -EINVAL;
+ }
+
+ if (kstrtou32(arg, 0, &count)) {
+ pr_err("Invalid DRC count specified.\n");
+ return -EINVAL;
+ }
+
+ hp_elog->_drc_u.drc_count = cpu_to_be32(count);
+ } else {
+ pr_err("Invalid id_type specified.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static ssize_t dlpar_store(const struct class *class, const struct class_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pseries_hp_errorlog hp_elog;
+ char *argbuf;
+ char *args;
+ int rc;
+
+ args = argbuf = kstrdup(buf, GFP_KERNEL);
+ if (!argbuf)
+ return -ENOMEM;
+
+ /*
+ * Parse out the request from the user, this will be in the form:
+ * <resource> <action> <id_type> <id>
+ */
+ rc = dlpar_parse_resource(&args, &hp_elog);
+ if (rc)
+ goto dlpar_store_out;
+
+ rc = dlpar_parse_action(&args, &hp_elog);
+ if (rc)
+ goto dlpar_store_out;
+
+ rc = dlpar_parse_id_type(&args, &hp_elog);
+ if (rc)
+ goto dlpar_store_out;
+
+ rc = handle_dlpar_errorlog(&hp_elog);
+
+dlpar_store_out:
+ kfree(argbuf);
+
+ if (rc)
+ pr_err("Could not handle DLPAR request \"%s\"\n", buf);
+
+ return rc ? rc : count;
+}
+
+static ssize_t dlpar_show(const struct class *class, const struct class_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%s\n", "memory,cpu");
+}
+
+static CLASS_ATTR_RW(dlpar);
+
+int __init dlpar_workqueue_init(void)
+{
+ if (pseries_hp_wq)
+ return 0;
+
+ pseries_hp_wq = alloc_ordered_workqueue("pseries hotplug workqueue", 0);
+
+ return pseries_hp_wq ? 0 : -ENOMEM;
+}
+
+static int __init dlpar_sysfs_init(void)
+{
+ int rc;
+
+ rc = dlpar_workqueue_init();
+ if (rc)
+ return rc;
+
+ return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
+}
+machine_device_initcall(pseries, dlpar_sysfs_init);
+
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
new file mode 100644
index 000000000..3f1cdcceb
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtual Processor Dispatch Trace Log
+ *
+ * (C) Copyright IBM Corporation 2009
+ *
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ */
+
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <asm/smp.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <asm/firmware.h>
+#include <asm/dtl.h>
+#include <asm/lppaca.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/machdep.h>
+
+#ifdef CONFIG_DTL
+struct dtl {
+ struct dtl_entry *buf;
+ int cpu;
+ int buf_entries;
+ u64 last_idx;
+ spinlock_t lock;
+};
+static DEFINE_PER_CPU(struct dtl, cpu_dtl);
+
+static u8 dtl_event_mask = DTL_LOG_ALL;
+
+
+/*
+ * Size of per-cpu log buffers. Firmware requires that the buffer does
+ * not cross a 4k boundary.
+ */
+static int dtl_buf_entries = N_DISPATCH_LOG;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+
+/*
+ * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
+ * reading from the dispatch trace log. If other code wants to consume
+ * DTL entries, it can set this pointer to a function that will get
+ * called once for each DTL entry that gets processed.
+ */
+static void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
+
+struct dtl_ring {
+ u64 write_index;
+ struct dtl_entry *write_ptr;
+ struct dtl_entry *buf;
+ struct dtl_entry *buf_end;
+};
+
+static DEFINE_PER_CPU(struct dtl_ring, dtl_rings);
+
+static atomic_t dtl_count;
+
+/*
+ * The cpu accounting code controls the DTL ring buffer, and we get
+ * given entries as they are processed.
+ */
+static void consume_dtle(struct dtl_entry *dtle, u64 index)
+{
+ struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings);
+ struct dtl_entry *wp = dtlr->write_ptr;
+ struct lppaca *vpa = local_paca->lppaca_ptr;
+
+ if (!wp)
+ return;
+
+ *wp = *dtle;
+ barrier();
+
+ /* check for hypervisor ring buffer overflow, ignore this entry if so */
+ if (index + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx))
+ return;
+
+ ++wp;
+ if (wp == dtlr->buf_end)
+ wp = dtlr->buf;
+ dtlr->write_ptr = wp;
+
+ /* incrementing write_index makes the new entry visible */
+ smp_wmb();
+ ++dtlr->write_index;
+}
+
+static int dtl_start(struct dtl *dtl)
+{
+ struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu);
+
+ dtlr->buf = dtl->buf;
+ dtlr->buf_end = dtl->buf + dtl->buf_entries;
+ dtlr->write_index = 0;
+
+ /* setting write_ptr enables logging into our buffer */
+ smp_wmb();
+ dtlr->write_ptr = dtl->buf;
+
+ /* enable event logging */
+ lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask;
+
+ dtl_consumer = consume_dtle;
+ atomic_inc(&dtl_count);
+ return 0;
+}
+
+static void dtl_stop(struct dtl *dtl)
+{
+ struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu);
+
+ dtlr->write_ptr = NULL;
+ smp_wmb();
+
+ dtlr->buf = NULL;
+
+ /* restore dtl_enable_mask */
+ lppaca_of(dtl->cpu).dtl_enable_mask = DTL_LOG_PREEMPT;
+
+ if (atomic_dec_and_test(&dtl_count))
+ dtl_consumer = NULL;
+}
+
+static u64 dtl_current_index(struct dtl *dtl)
+{
+ return per_cpu(dtl_rings, dtl->cpu).write_index;
+}
+
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+static int dtl_start(struct dtl *dtl)
+{
+ unsigned long addr;
+ int ret, hwcpu;
+
+ /* Register our dtl buffer with the hypervisor. The HV expects the
+ * buffer size to be passed in the second word of the buffer */
+ ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES);
+
+ hwcpu = get_hard_smp_processor_id(dtl->cpu);
+ addr = __pa(dtl->buf);
+ ret = register_dtl(hwcpu, addr);
+ if (ret) {
+ printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) "
+ "failed with %d\n", __func__, dtl->cpu, hwcpu, ret);
+ return -EIO;
+ }
+
+ /* set our initial buffer indices */
+ lppaca_of(dtl->cpu).dtl_idx = 0;
+
+ /* ensure that our updates to the lppaca fields have occurred before
+ * we actually enable the logging */
+ smp_wmb();
+
+ /* enable event logging */
+ lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask;
+
+ return 0;
+}
+
+static void dtl_stop(struct dtl *dtl)
+{
+ int hwcpu = get_hard_smp_processor_id(dtl->cpu);
+
+ lppaca_of(dtl->cpu).dtl_enable_mask = 0x0;
+
+ unregister_dtl(hwcpu);
+}
+
+static u64 dtl_current_index(struct dtl *dtl)
+{
+ return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx);
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+static int dtl_enable(struct dtl *dtl)
+{
+ long int n_entries;
+ long int rc;
+ struct dtl_entry *buf = NULL;
+
+ if (!dtl_cache)
+ return -ENOMEM;
+
+ /* only allow one reader */
+ if (dtl->buf)
+ return -EBUSY;
+
+ /* ensure there are no other conflicting dtl users */
+ if (!read_trylock(&dtl_access_lock))
+ return -EBUSY;
+
+ n_entries = dtl_buf_entries;
+ buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
+ if (!buf) {
+ printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
+ __func__, dtl->cpu);
+ read_unlock(&dtl_access_lock);
+ return -ENOMEM;
+ }
+
+ spin_lock(&dtl->lock);
+ rc = -EBUSY;
+ if (!dtl->buf) {
+ /* store the original allocation size for use during read */
+ dtl->buf_entries = n_entries;
+ dtl->buf = buf;
+ dtl->last_idx = 0;
+ rc = dtl_start(dtl);
+ if (rc)
+ dtl->buf = NULL;
+ }
+ spin_unlock(&dtl->lock);
+
+ if (rc) {
+ read_unlock(&dtl_access_lock);
+ kmem_cache_free(dtl_cache, buf);
+ }
+
+ return rc;
+}
+
+static void dtl_disable(struct dtl *dtl)
+{
+ spin_lock(&dtl->lock);
+ dtl_stop(dtl);
+ kmem_cache_free(dtl_cache, dtl->buf);
+ dtl->buf = NULL;
+ dtl->buf_entries = 0;
+ spin_unlock(&dtl->lock);
+ read_unlock(&dtl_access_lock);
+}
+
+/* file interface */
+
+static int dtl_file_open(struct inode *inode, struct file *filp)
+{
+ struct dtl *dtl = inode->i_private;
+ int rc;
+
+ rc = dtl_enable(dtl);
+ if (rc)
+ return rc;
+
+ filp->private_data = dtl;
+ return 0;
+}
+
+static int dtl_file_release(struct inode *inode, struct file *filp)
+{
+ struct dtl *dtl = inode->i_private;
+ dtl_disable(dtl);
+ return 0;
+}
+
+static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len,
+ loff_t *pos)
+{
+ long int rc, n_read, n_req, read_size;
+ struct dtl *dtl;
+ u64 cur_idx, last_idx, i;
+
+ if ((len % sizeof(struct dtl_entry)) != 0)
+ return -EINVAL;
+
+ dtl = filp->private_data;
+
+ /* requested number of entries to read */
+ n_req = len / sizeof(struct dtl_entry);
+
+ /* actual number of entries read */
+ n_read = 0;
+
+ spin_lock(&dtl->lock);
+
+ cur_idx = dtl_current_index(dtl);
+ last_idx = dtl->last_idx;
+
+ if (last_idx + dtl->buf_entries <= cur_idx)
+ last_idx = cur_idx - dtl->buf_entries + 1;
+
+ if (last_idx + n_req > cur_idx)
+ n_req = cur_idx - last_idx;
+
+ if (n_req > 0)
+ dtl->last_idx = last_idx + n_req;
+
+ spin_unlock(&dtl->lock);
+
+ if (n_req <= 0)
+ return 0;
+
+ i = last_idx % dtl->buf_entries;
+
+ /* read the tail of the buffer if we've wrapped */
+ if (i + n_req > dtl->buf_entries) {
+ read_size = dtl->buf_entries - i;
+
+ rc = copy_to_user(buf, &dtl->buf[i],
+ read_size * sizeof(struct dtl_entry));
+ if (rc)
+ return -EFAULT;
+
+ i = 0;
+ n_req -= read_size;
+ n_read += read_size;
+ buf += read_size * sizeof(struct dtl_entry);
+ }
+
+ /* .. and now the head */
+ rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry));
+ if (rc)
+ return -EFAULT;
+
+ n_read += n_req;
+
+ return n_read * sizeof(struct dtl_entry);
+}
+
+static const struct file_operations dtl_fops = {
+ .open = dtl_file_open,
+ .release = dtl_file_release,
+ .read = dtl_file_read,
+ .llseek = no_llseek,
+};
+
+static struct dentry *dtl_dir;
+
+static void dtl_setup_file(struct dtl *dtl)
+{
+ char name[10];
+
+ sprintf(name, "cpu-%d", dtl->cpu);
+
+ debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
+}
+
+static int dtl_init(void)
+{
+ int i;
+
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return -ENODEV;
+
+ /* set up common debugfs structure */
+
+ dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir);
+
+ debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
+ debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
+
+ /* set up the per-cpu log structures */
+ for_each_possible_cpu(i) {
+ struct dtl *dtl = &per_cpu(cpu_dtl, i);
+ spin_lock_init(&dtl->lock);
+ dtl->cpu = i;
+
+ dtl_setup_file(dtl);
+ }
+
+ return 0;
+}
+machine_arch_initcall(pseries, dtl_init);
+#endif /* CONFIG_DTL */
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/*
+ * Scan the dispatch trace log and count up the stolen time.
+ * Should be called with interrupts disabled.
+ */
+static notrace u64 scan_dispatch_log(u64 stop_tb)
+{
+ u64 i = local_paca->dtl_ridx;
+ struct dtl_entry *dtl = local_paca->dtl_curr;
+ struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
+ struct lppaca *vpa = local_paca->lppaca_ptr;
+ u64 tb_delta;
+ u64 stolen = 0;
+ u64 dtb;
+
+ if (!dtl)
+ return 0;
+
+ if (i == be64_to_cpu(vpa->dtl_idx))
+ return 0;
+ while (i < be64_to_cpu(vpa->dtl_idx)) {
+ dtb = be64_to_cpu(dtl->timebase);
+ tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
+ be32_to_cpu(dtl->ready_to_enqueue_time);
+ barrier();
+ if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
+ /* buffer has overflowed */
+ i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
+ dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+ continue;
+ }
+ if (dtb > stop_tb)
+ break;
+#ifdef CONFIG_DTL
+ if (dtl_consumer)
+ dtl_consumer(dtl, i);
+#endif
+ stolen += tb_delta;
+ ++i;
+ ++dtl;
+ if (dtl == dtl_end)
+ dtl = local_paca->dispatch_log;
+ }
+ local_paca->dtl_ridx = i;
+ local_paca->dtl_curr = dtl;
+ return stolen;
+}
+
+/*
+ * Accumulate stolen time by scanning the dispatch trace log.
+ * Called on entry from user mode.
+ */
+void notrace pseries_accumulate_stolen_time(void)
+{
+ u64 sst, ust;
+ struct cpu_accounting_data *acct = &local_paca->accounting;
+
+ sst = scan_dispatch_log(acct->starttime_user);
+ ust = scan_dispatch_log(acct->starttime);
+ acct->stime -= sst;
+ acct->utime -= ust;
+ acct->steal_time += ust + sst;
+}
+
+u64 pseries_calculate_stolen_time(u64 stop_tb)
+{
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return 0;
+
+ if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
+ return scan_dispatch_log(stop_tb);
+
+ return 0;
+}
+
+#endif
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
new file mode 100644
index 000000000..def184da5
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -0,0 +1,887 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * The file intends to implement the platform dependent EEH operations on pseries.
+ * Actually, the pseries platform is built based on RTAS heavily. That means the
+ * pseries platform dependent EEH operations will be built on RTAS calls. The functions
+ * are derived from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has
+ * been done.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2011.
+ * Copyright IBM Corporation 2001, 2005, 2006
+ * Copyright Dave Engebretsen & Todd Inglett 2001
+ * Copyright Linas Vepstas 2005, 2006
+ */
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/crash_dump.h>
+
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/machdep.h>
+#include <asm/ppc-pci.h>
+#include <asm/rtas.h>
+
+/* RTAS tokens */
+static int ibm_set_eeh_option;
+static int ibm_set_slot_reset;
+static int ibm_read_slot_reset_state;
+static int ibm_read_slot_reset_state2;
+static int ibm_slot_error_detail;
+static int ibm_get_config_addr_info;
+static int ibm_get_config_addr_info2;
+static int ibm_configure_pe;
+
+static void pseries_eeh_init_edev(struct pci_dn *pdn);
+
+static void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ return;
+
+ dev_dbg(&pdev->dev, "EEH: Setting up device\n");
+#ifdef CONFIG_PCI_IOV
+ if (pdev->is_virtfn) {
+ pdn->device_id = pdev->device;
+ pdn->vendor_id = pdev->vendor;
+ pdn->class_code = pdev->class;
+ /*
+ * Last allow unfreeze return code used for retrieval
+ * by user space in eeh-sysfs to show the last command
+ * completion from platform.
+ */
+ pdn->last_allow_rc = 0;
+ }
+#endif
+ pseries_eeh_init_edev(pdn);
+#ifdef CONFIG_PCI_IOV
+ if (pdev->is_virtfn) {
+ /*
+ * FIXME: This really should be handled by choosing the right
+ * parent PE in pseries_eeh_init_edev().
+ */
+ struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe;
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+ edev->pe_config_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+ eeh_pe_tree_remove(edev); /* Remove as it is adding to bus pe */
+ eeh_pe_tree_insert(edev, physfn_pe); /* Add as VF PE type */
+ }
+#endif
+ eeh_probe_device(pdev);
+}
+
+
+/**
+ * pseries_eeh_get_pe_config_addr - Find the pe_config_addr for a device
+ * @pdn: pci_dn of the input device
+ *
+ * The EEH RTAS calls use a tuple consisting of: (buid_hi, buid_lo,
+ * pe_config_addr) as a handle to a given PE. This function finds the
+ * pe_config_addr based on the device's config addr.
+ *
+ * Keep in mind that the pe_config_addr *might* be numerically identical to the
+ * device's config addr, but the two are conceptually distinct.
+ *
+ * Returns the pe_config_addr, or a negative error code.
+ */
+static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
+{
+ int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ struct pci_controller *phb = pdn->phb;
+ int ret, rets[3];
+
+ if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+ /*
+ * First of all, use function 1 to determine if this device is
+ * part of a PE or not. ret[0] being zero indicates it's not.
+ */
+ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid), 1);
+ if (ret || (rets[0] == 0))
+ return -ENOENT;
+
+ /* Retrieve the associated PE config address with function 0 */
+ ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid), 0);
+ if (ret) {
+ pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
+ __func__, phb->global_number, config_addr);
+ return -ENXIO;
+ }
+
+ return rets[0];
+ }
+
+ if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+ ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid), 0);
+ if (ret) {
+ pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
+ __func__, phb->global_number, config_addr);
+ return -ENXIO;
+ }
+
+ return rets[0];
+ }
+
+ /*
+ * PAPR does describe a process for finding the pe_config_addr that was
+ * used before the ibm,get-config-addr-info calls were added. However,
+ * I haven't found *any* systems that don't have that RTAS call
+ * implemented. If you happen to find one that needs the old DT based
+ * process, patches are welcome!
+ */
+ return -ENOENT;
+}
+
+/**
+ * pseries_eeh_phb_reset - Reset the specified PHB
+ * @phb: PCI controller
+ * @config_addr: the associated config address
+ * @option: reset option
+ *
+ * Reset the specified PHB/PE
+ */
+static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, int option)
+{
+ int ret;
+
+ /* Reset PE through RTAS call */
+ ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid), option);
+
+ /* If fundamental-reset not supported, try hot-reset */
+ if (option == EEH_RESET_FUNDAMENTAL && ret == -8) {
+ option = EEH_RESET_HOT;
+ ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid), option);
+ }
+
+ /* We need reset hold or settlement delay */
+ if (option == EEH_RESET_FUNDAMENTAL || option == EEH_RESET_HOT)
+ msleep(EEH_PE_RST_HOLD_TIME);
+ else
+ msleep(EEH_PE_RST_SETTLE_TIME);
+
+ return ret;
+}
+
+/**
+ * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE
+ * @phb: PCI controller
+ * @config_addr: the associated config address
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int pseries_eeh_phb_configure_bridge(struct pci_controller *phb, int config_addr)
+{
+ int ret;
+ /* Waiting 0.2s maximum before skipping configuration */
+ int max_wait = 200;
+
+ while (max_wait > 0) {
+ ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+ config_addr, BUID_HI(phb->buid),
+ BUID_LO(phb->buid));
+
+ if (!ret)
+ return ret;
+ if (ret < 0)
+ break;
+
+ /*
+ * If RTAS returns a delay value that's above 100ms, cut it
+ * down to 100ms in case firmware made a mistake. For more
+ * on how these delay values work see rtas_busy_delay_time
+ */
+ if (ret > RTAS_EXTENDED_DELAY_MIN+2 &&
+ ret <= RTAS_EXTENDED_DELAY_MAX)
+ ret = RTAS_EXTENDED_DELAY_MIN+2;
+
+ max_wait -= rtas_busy_delay_time(ret);
+
+ if (max_wait < 0)
+ break;
+
+ rtas_busy_delay(ret);
+ }
+
+ pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
+ __func__, phb->global_number, config_addr, ret);
+ /* PAPR defines -3 as "Parameter Error" for this function: */
+ if (ret == -3)
+ return -EINVAL;
+ else
+ return -EIO;
+}
+
+/*
+ * Buffer for reporting slot-error-detail rtas calls. Its here
+ * in BSS, and not dynamically alloced, so that it ends up in
+ * RMO where RTAS can access it.
+ */
+static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
+static DEFINE_SPINLOCK(slot_errbuf_lock);
+static int eeh_error_buf_size;
+
+static int pseries_eeh_cap_start(struct pci_dn *pdn)
+{
+ u32 status;
+
+ if (!pdn)
+ return 0;
+
+ rtas_read_config(pdn, PCI_STATUS, 2, &status);
+ if (!(status & PCI_STATUS_CAP_LIST))
+ return 0;
+
+ return PCI_CAPABILITY_LIST;
+}
+
+
+static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap)
+{
+ int pos = pseries_eeh_cap_start(pdn);
+ int cnt = 48; /* Maximal number of capabilities */
+ u32 id;
+
+ if (!pos)
+ return 0;
+
+ while (cnt--) {
+ rtas_read_config(pdn, pos, 1, &pos);
+ if (pos < 0x40)
+ break;
+ pos &= ~3;
+ rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+ if (id == 0xff)
+ break;
+ if (id == cap)
+ return pos;
+ pos += PCI_CAP_LIST_NEXT;
+ }
+
+ return 0;
+}
+
+static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap)
+{
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+ u32 header;
+ int pos = 256;
+ int ttl = (4096 - 256) / 8;
+
+ if (!edev || !edev->pcie_cap)
+ return 0;
+ if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+ return 0;
+ else if (!header)
+ return 0;
+
+ while (ttl-- > 0) {
+ if (PCI_EXT_CAP_ID(header) == cap && pos)
+ return pos;
+
+ pos = PCI_EXT_CAP_NEXT(header);
+ if (pos < 256)
+ break;
+
+ if (rtas_read_config(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+ break;
+ }
+
+ return 0;
+}
+
+/**
+ * pseries_eeh_pe_get_parent - Retrieve the parent PE
+ * @edev: EEH device
+ *
+ * The whole PEs existing in the system are organized as hierarchy
+ * tree. The function is used to retrieve the parent PE according
+ * to the parent EEH device.
+ */
+static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
+{
+ struct eeh_dev *parent;
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+ /*
+ * It might have the case for the indirect parent
+ * EEH device already having associated PE, but
+ * the direct parent EEH device doesn't have yet.
+ */
+ if (edev->physfn)
+ pdn = pci_get_pdn(edev->physfn);
+ else
+ pdn = pdn ? pdn->parent : NULL;
+ while (pdn) {
+ /* We're poking out of PCI territory */
+ parent = pdn_to_eeh_dev(pdn);
+ if (!parent)
+ return NULL;
+
+ if (parent->pe)
+ return parent->pe;
+
+ pdn = pdn->parent;
+ }
+
+ return NULL;
+}
+
+/**
+ * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn
+ *
+ * @pdn: PCI device node
+ *
+ * When we discover a new PCI device via the device-tree we create a
+ * corresponding pci_dn and we allocate, but don't initialise, an eeh_dev.
+ * This function takes care of the initialisation and inserts the eeh_dev
+ * into the correct eeh_pe. If no eeh_pe exists we'll allocate one.
+ */
+static void pseries_eeh_init_edev(struct pci_dn *pdn)
+{
+ struct eeh_pe pe, *parent;
+ struct eeh_dev *edev;
+ u32 pcie_flags;
+ int ret;
+
+ if (WARN_ON_ONCE(!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)))
+ return;
+
+ /*
+ * Find the eeh_dev for this pdn. The storage for the eeh_dev was
+ * allocated at the same time as the pci_dn.
+ *
+ * XXX: We should probably re-visit that.
+ */
+ edev = pdn_to_eeh_dev(pdn);
+ if (!edev)
+ return;
+
+ /*
+ * If ->pe is set then we've already probed this device. We hit
+ * this path when a pci_dev is removed and rescanned while recovering
+ * a PE (i.e. for devices where the driver doesn't support error
+ * recovery).
+ */
+ if (edev->pe)
+ return;
+
+ /* Check class/vendor/device IDs */
+ if (!pdn->vendor_id || !pdn->device_id || !pdn->class_code)
+ return;
+
+ /* Skip for PCI-ISA bridge */
+ if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
+ return;
+
+ eeh_edev_dbg(edev, "Probing device\n");
+
+ /*
+ * Update class code and mode of eeh device. We need
+ * correctly reflects that current device is root port
+ * or PCIe switch downstream port.
+ */
+ edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+ edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+ edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
+ edev->mode &= 0xFFFFFF00;
+ if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
+ edev->mode |= EEH_DEV_BRIDGE;
+ if (edev->pcie_cap) {
+ rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+ 2, &pcie_flags);
+ pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+ if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+ edev->mode |= EEH_DEV_ROOT_PORT;
+ else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+ edev->mode |= EEH_DEV_DS_PORT;
+ }
+ }
+
+ /* first up, find the pe_config_addr for the PE containing the device */
+ ret = pseries_eeh_get_pe_config_addr(pdn);
+ if (ret < 0) {
+ eeh_edev_dbg(edev, "Unable to find pe_config_addr\n");
+ goto err;
+ }
+
+ /* Try enable EEH on the fake PE */
+ memset(&pe, 0, sizeof(struct eeh_pe));
+ pe.phb = pdn->phb;
+ pe.addr = ret;
+
+ eeh_edev_dbg(edev, "Enabling EEH on device\n");
+ ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
+ if (ret) {
+ eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
+ goto err;
+ }
+
+ edev->pe_config_addr = pe.addr;
+
+ eeh_add_flag(EEH_ENABLED);
+
+ parent = pseries_eeh_pe_get_parent(edev);
+ eeh_pe_tree_insert(edev, parent);
+ eeh_save_bars(edev);
+ eeh_edev_dbg(edev, "EEH enabled for device");
+
+ return;
+
+err:
+ eeh_edev_dbg(edev, "EEH is unsupported on device (code = %d)\n", ret);
+}
+
+static struct eeh_dev *pseries_eeh_probe(struct pci_dev *pdev)
+{
+ struct eeh_dev *edev;
+ struct pci_dn *pdn;
+
+ pdn = pci_get_pdn_by_devfn(pdev->bus, pdev->devfn);
+ if (!pdn)
+ return NULL;
+
+ /*
+ * If the system supports EEH on this device then the eeh_dev was
+ * configured and inserted into a PE in pseries_eeh_init_edev()
+ */
+ edev = pdn_to_eeh_dev(pdn);
+ if (!edev || !edev->pe)
+ return NULL;
+
+ return edev;
+}
+
+/**
+ * pseries_eeh_init_edev_recursive - Enable EEH for the indicated device
+ * @pdn: PCI device node
+ *
+ * This routine must be used to perform EEH initialization for the
+ * indicated PCI device that was added after system boot (e.g.
+ * hotplug, dlpar).
+ */
+void pseries_eeh_init_edev_recursive(struct pci_dn *pdn)
+{
+ struct pci_dn *n;
+
+ if (!pdn)
+ return;
+
+ list_for_each_entry(n, &pdn->child_list, list)
+ pseries_eeh_init_edev_recursive(n);
+
+ pseries_eeh_init_edev(pdn);
+}
+EXPORT_SYMBOL_GPL(pseries_eeh_init_edev_recursive);
+
+/**
+ * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @pe: EEH PE
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
+{
+ int ret = 0;
+
+ /*
+ * When we're enabling or disabling EEH functionality on
+ * the particular PE, the PE config address is possibly
+ * unavailable. Therefore, we have to figure it out from
+ * the FDT node.
+ */
+ switch (option) {
+ case EEH_OPT_DISABLE:
+ case EEH_OPT_ENABLE:
+ case EEH_OPT_THAW_MMIO:
+ case EEH_OPT_THAW_DMA:
+ break;
+ case EEH_OPT_FREEZE_PE:
+ /* Not support */
+ return 0;
+ default:
+ pr_err("%s: Invalid option %d\n", __func__, option);
+ return -EINVAL;
+ }
+
+ ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
+ pe->addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid), option);
+
+ return ret;
+}
+
+/**
+ * pseries_eeh_get_state - Retrieve PE state
+ * @pe: EEH PE
+ * @delay: suggested time to wait if state is unavailable
+ *
+ * Retrieve the state of the specified PE. On RTAS compliant
+ * pseries platform, there already has one dedicated RTAS function
+ * for the purpose. It's notable that the associated PE config address
+ * might be ready when calling the function. Therefore, endeavour to
+ * use the PE config address if possible. Further more, there're 2
+ * RTAS calls for the purpose, we need to try the new one and back
+ * to the old one if the new one couldn't work properly.
+ */
+static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+ int ret;
+ int rets[4];
+ int result;
+
+ if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
+ ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
+ pe->addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid));
+ } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
+ /* Fake PE unavailable info */
+ rets[2] = 0;
+ ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
+ pe->addr, BUID_HI(pe->phb->buid),
+ BUID_LO(pe->phb->buid));
+ } else {
+ return EEH_STATE_NOT_SUPPORT;
+ }
+
+ if (ret)
+ return ret;
+
+ /* Parse the result out */
+ if (!rets[1])
+ return EEH_STATE_NOT_SUPPORT;
+
+ switch(rets[0]) {
+ case 0:
+ result = EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_DMA_ACTIVE;
+ break;
+ case 1:
+ result = EEH_STATE_RESET_ACTIVE |
+ EEH_STATE_MMIO_ACTIVE |
+ EEH_STATE_DMA_ACTIVE;
+ break;
+ case 2:
+ result = 0;
+ break;
+ case 4:
+ result = EEH_STATE_MMIO_ENABLED;
+ break;
+ case 5:
+ if (rets[2]) {
+ if (delay)
+ *delay = rets[2];
+ result = EEH_STATE_UNAVAILABLE;
+ } else {
+ result = EEH_STATE_NOT_SUPPORT;
+ }
+ break;
+ default:
+ result = EEH_STATE_NOT_SUPPORT;
+ }
+
+ return result;
+}
+
+/**
+ * pseries_eeh_reset - Reset the specified PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Reset the specified PE
+ */
+static int pseries_eeh_reset(struct eeh_pe *pe, int option)
+{
+ return pseries_eeh_phb_reset(pe->phb, pe->addr, option);
+}
+
+/**
+ * pseries_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ * Actually, the error will be retrieved through the dedicated
+ * RTAS call.
+ */
+static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&slot_errbuf_lock, flags);
+ memset(slot_errbuf, 0, eeh_error_buf_size);
+
+ ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, pe->addr,
+ BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid),
+ virt_to_phys(drv_log), len,
+ virt_to_phys(slot_errbuf), eeh_error_buf_size,
+ severity);
+ if (!ret)
+ log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
+ spin_unlock_irqrestore(&slot_errbuf_lock, flags);
+
+ return ret;
+}
+
+/**
+ * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @pe: EEH PE
+ *
+ */
+static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
+{
+ return pseries_eeh_phb_configure_bridge(pe->phb, pe->addr);
+}
+
+/**
+ * pseries_eeh_read_config - Read PCI config space
+ * @edev: EEH device handle
+ * @where: PCI config space offset
+ * @size: size to read
+ * @val: return value
+ *
+ * Read config space from the speicifed device
+ */
+static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u32 *val)
+{
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+ return rtas_read_config(pdn, where, size, val);
+}
+
+/**
+ * pseries_eeh_write_config - Write PCI config space
+ * @edev: EEH device handle
+ * @where: PCI config space offset
+ * @size: size to write
+ * @val: value to be written
+ *
+ * Write config space to the specified device
+ */
+static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u32 val)
+{
+ struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+ return rtas_write_config(pdn, where, size, val);
+}
+
+#ifdef CONFIG_PCI_IOV
+static int pseries_send_allow_unfreeze(struct pci_dn *pdn, u16 *vf_pe_array, int cur_vfs)
+{
+ int rc;
+ int ibm_allow_unfreeze = rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE);
+ unsigned long buid, addr;
+
+ addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ buid = pdn->phb->buid;
+ spin_lock(&rtas_data_buf_lock);
+ memcpy(rtas_data_buf, vf_pe_array, RTAS_DATA_BUF_SIZE);
+ rc = rtas_call(ibm_allow_unfreeze, 5, 1, NULL,
+ addr,
+ BUID_HI(buid),
+ BUID_LO(buid),
+ rtas_data_buf, cur_vfs * sizeof(u16));
+ spin_unlock(&rtas_data_buf_lock);
+ if (rc)
+ pr_warn("%s: Failed to allow unfreeze for PHB#%x-PE#%lx, rc=%x\n",
+ __func__,
+ pdn->phb->global_number, addr, rc);
+ return rc;
+}
+
+static int pseries_call_allow_unfreeze(struct eeh_dev *edev)
+{
+ int cur_vfs = 0, rc = 0, vf_index, bus, devfn, vf_pe_num;
+ struct pci_dn *pdn, *tmp, *parent, *physfn_pdn;
+ u16 *vf_pe_array;
+
+ vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!vf_pe_array)
+ return -ENOMEM;
+ if (pci_num_vf(edev->physfn ? edev->physfn : edev->pdev)) {
+ if (edev->pdev->is_physfn) {
+ cur_vfs = pci_num_vf(edev->pdev);
+ pdn = eeh_dev_to_pdn(edev);
+ parent = pdn->parent;
+ for (vf_index = 0; vf_index < cur_vfs; vf_index++)
+ vf_pe_array[vf_index] =
+ cpu_to_be16(pdn->pe_num_map[vf_index]);
+ rc = pseries_send_allow_unfreeze(pdn, vf_pe_array,
+ cur_vfs);
+ pdn->last_allow_rc = rc;
+ for (vf_index = 0; vf_index < cur_vfs; vf_index++) {
+ list_for_each_entry_safe(pdn, tmp,
+ &parent->child_list,
+ list) {
+ bus = pci_iov_virtfn_bus(edev->pdev,
+ vf_index);
+ devfn = pci_iov_virtfn_devfn(edev->pdev,
+ vf_index);
+ if (pdn->busno != bus ||
+ pdn->devfn != devfn)
+ continue;
+ pdn->last_allow_rc = rc;
+ }
+ }
+ } else {
+ pdn = pci_get_pdn(edev->pdev);
+ physfn_pdn = pci_get_pdn(edev->physfn);
+
+ vf_pe_num = physfn_pdn->pe_num_map[edev->vf_index];
+ vf_pe_array[0] = cpu_to_be16(vf_pe_num);
+ rc = pseries_send_allow_unfreeze(physfn_pdn,
+ vf_pe_array, 1);
+ pdn->last_allow_rc = rc;
+ }
+ }
+
+ kfree(vf_pe_array);
+ return rc;
+}
+
+static int pseries_notify_resume(struct eeh_dev *edev)
+{
+ if (!edev)
+ return -EEXIST;
+
+ if (rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE) == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ if (edev->pdev->is_physfn || edev->pdev->is_virtfn)
+ return pseries_call_allow_unfreeze(edev);
+
+ return 0;
+}
+#endif
+
+static struct eeh_ops pseries_eeh_ops = {
+ .name = "pseries",
+ .probe = pseries_eeh_probe,
+ .set_option = pseries_eeh_set_option,
+ .get_state = pseries_eeh_get_state,
+ .reset = pseries_eeh_reset,
+ .get_log = pseries_eeh_get_log,
+ .configure_bridge = pseries_eeh_configure_bridge,
+ .err_inject = NULL,
+ .read_config = pseries_eeh_read_config,
+ .write_config = pseries_eeh_write_config,
+ .next_error = NULL,
+ .restore_config = NULL, /* NB: configure_bridge() does this */
+#ifdef CONFIG_PCI_IOV
+ .notify_resume = pseries_notify_resume
+#endif
+};
+
+/**
+ * eeh_pseries_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on pseries platform. This function should be
+ * called before any EEH related functions.
+ */
+static int __init eeh_pseries_init(void)
+{
+ struct pci_controller *phb;
+ struct pci_dn *pdn;
+ int ret, config_addr;
+
+ /* figure out EEH RTAS function call tokens */
+ ibm_set_eeh_option = rtas_function_token(RTAS_FN_IBM_SET_EEH_OPTION);
+ ibm_set_slot_reset = rtas_function_token(RTAS_FN_IBM_SET_SLOT_RESET);
+ ibm_read_slot_reset_state2 = rtas_function_token(RTAS_FN_IBM_READ_SLOT_RESET_STATE2);
+ ibm_read_slot_reset_state = rtas_function_token(RTAS_FN_IBM_READ_SLOT_RESET_STATE);
+ ibm_slot_error_detail = rtas_function_token(RTAS_FN_IBM_SLOT_ERROR_DETAIL);
+ ibm_get_config_addr_info2 = rtas_function_token(RTAS_FN_IBM_GET_CONFIG_ADDR_INFO2);
+ ibm_get_config_addr_info = rtas_function_token(RTAS_FN_IBM_GET_CONFIG_ADDR_INFO);
+ ibm_configure_pe = rtas_function_token(RTAS_FN_IBM_CONFIGURE_PE);
+
+ /*
+ * ibm,configure-pe and ibm,configure-bridge have the same semantics,
+ * however ibm,configure-pe can be faster. If we can't find
+ * ibm,configure-pe then fall back to using ibm,configure-bridge.
+ */
+ if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE)
+ ibm_configure_pe = rtas_function_token(RTAS_FN_IBM_CONFIGURE_BRIDGE);
+
+ /*
+ * Necessary sanity check. We needn't check "get-config-addr-info"
+ * and its variant since the old firmware probably support address
+ * of domain/bus/slot/function for EEH RTAS operations.
+ */
+ if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE ||
+ ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE ||
+ (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
+ ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) ||
+ ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE ||
+ ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
+ pr_info("EEH functionality not supported\n");
+ return -EINVAL;
+ }
+
+ /* Initialize error log size */
+ eeh_error_buf_size = rtas_get_error_log_max();
+
+ /* Set EEH probe mode */
+ eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
+
+ /* Set EEH machine dependent code */
+ ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
+
+ if (is_kdump_kernel() || reset_devices) {
+ pr_info("Issue PHB reset ...\n");
+ list_for_each_entry(phb, &hose_list, list_node) {
+ // Skip if the slot is empty
+ if (list_empty(&PCI_DN(phb->dn)->child_list))
+ continue;
+
+ pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
+ config_addr = pseries_eeh_get_pe_config_addr(pdn);
+
+ /* invalid PE config addr */
+ if (config_addr < 0)
+ continue;
+
+ pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
+ pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
+ pseries_eeh_phb_configure_bridge(phb, config_addr);
+ }
+ }
+
+ ret = eeh_init(&pseries_eeh_ops);
+ if (!ret)
+ pr_info("EEH: pSeries platform initialized\n");
+ else
+ pr_info("EEH: pSeries platform initialization failure (%d)\n",
+ ret);
+ return ret;
+}
+machine_arch_initcall(pseries, eeh_pseries_init);
diff --git a/arch/powerpc/platforms/pseries/event_sources.c b/arch/powerpc/platforms/pseries/event_sources.c
new file mode 100644
index 000000000..623dfe0d8
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/event_sources.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Dave Engebretsen IBM Corporation
+ */
+
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+
+#include "pseries.h"
+
+void __init request_event_sources_irqs(struct device_node *np,
+ irq_handler_t handler,
+ const char *name)
+{
+ int i, virq, rc;
+
+ for (i = 0; i < 16; i++) {
+ virq = of_irq_get(np, i);
+ if (virq < 0)
+ return;
+ if (WARN(!virq, "event-sources: Unable to allocate "
+ "interrupt number for %pOF\n", np))
+ continue;
+
+ rc = request_irq(virq, handler, 0, name, NULL);
+ if (WARN(rc, "event-sources: Unable to request interrupt %d for %pOF\n",
+ virq, np))
+ return;
+ }
+}
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
new file mode 100644
index 000000000..18447e5fa
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pSeries firmware setup code.
+ *
+ * Portions from arch/powerpc/platforms/pseries/setup.c:
+ * Copyright (C) 1995 Linus Torvalds
+ * Adapted from 'alpha' version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * Modified by PPC64 Team, IBM Corp
+ *
+ * Portions from arch/powerpc/kernel/firmware.c
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ * Copyright (C) 2005 Stephen Rothwell, IBM Corporation
+ *
+ * Copyright 2006 IBM Corporation.
+ */
+
+
+#include <linux/of_fdt.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/svm.h>
+
+#include "pseries.h"
+
+struct hypertas_fw_feature {
+ unsigned long val;
+ char * name;
+};
+
+/*
+ * The names in this table match names in rtas/ibm,hypertas-functions. If the
+ * entry ends in a '*', only upto the '*' is matched. Otherwise the entire
+ * string must match.
+ */
+static __initdata struct hypertas_fw_feature
+hypertas_fw_features_table[] = {
+ {FW_FEATURE_PFT, "hcall-pft"},
+ {FW_FEATURE_TCE, "hcall-tce"},
+ {FW_FEATURE_SPRG0, "hcall-sprg0"},
+ {FW_FEATURE_DABR, "hcall-dabr"},
+ {FW_FEATURE_COPY, "hcall-copy"},
+ {FW_FEATURE_ASR, "hcall-asr"},
+ {FW_FEATURE_DEBUG, "hcall-debug"},
+ {FW_FEATURE_PERF, "hcall-perf"},
+ {FW_FEATURE_DUMP, "hcall-dump"},
+ {FW_FEATURE_INTERRUPT, "hcall-interrupt"},
+ {FW_FEATURE_MIGRATE, "hcall-migrate"},
+ {FW_FEATURE_PERFMON, "hcall-perfmon"},
+ {FW_FEATURE_CRQ, "hcall-crq"},
+ {FW_FEATURE_VIO, "hcall-vio"},
+ {FW_FEATURE_RDMA, "hcall-rdma"},
+ {FW_FEATURE_LLAN, "hcall-lLAN"},
+ {FW_FEATURE_BULK_REMOVE, "hcall-bulk"},
+ {FW_FEATURE_XDABR, "hcall-xdabr"},
+ {FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE,
+ "hcall-multi-tce"},
+ {FW_FEATURE_SPLPAR, "hcall-splpar"},
+ {FW_FEATURE_VPHN, "hcall-vphn"},
+ {FW_FEATURE_SET_MODE, "hcall-set-mode"},
+ {FW_FEATURE_BEST_ENERGY, "hcall-best-energy-1*"},
+ {FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"},
+ {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"},
+ {FW_FEATURE_PAPR_SCM, "hcall-scm"},
+ {FW_FEATURE_RPT_INVALIDATE, "hcall-rpt-invalidate"},
+ {FW_FEATURE_ENERGY_SCALE_INFO, "hcall-energy-scale-info"},
+ {FW_FEATURE_WATCHDOG, "hcall-watchdog"},
+ {FW_FEATURE_PLPKS, "hcall-pks"},
+};
+
+/* Build up the firmware features bitmask using the contents of
+ * device-tree/ibm,hypertas-functions. Ultimately this functionality may
+ * be moved into prom.c prom_init().
+ */
+static void __init fw_hypertas_feature_init(const char *hypertas,
+ unsigned long len)
+{
+ const char *s;
+ int i;
+
+ pr_debug(" -> fw_hypertas_feature_init()\n");
+
+ for (s = hypertas; s < hypertas + len; s += strlen(s) + 1) {
+ for (i = 0; i < ARRAY_SIZE(hypertas_fw_features_table); i++) {
+ const char *name = hypertas_fw_features_table[i].name;
+ size_t size;
+
+ /*
+ * If there is a '*' at the end of name, only check
+ * upto there
+ */
+ size = strlen(name);
+ if (size && name[size - 1] == '*') {
+ if (strncmp(name, s, size - 1))
+ continue;
+ } else if (strcmp(name, s))
+ continue;
+
+ /* we have a match */
+ powerpc_firmware_features |=
+ hypertas_fw_features_table[i].val;
+ break;
+ }
+ }
+
+ if (is_secure_guest() &&
+ (powerpc_firmware_features & FW_FEATURE_PUT_TCE_IND)) {
+ powerpc_firmware_features &= ~FW_FEATURE_PUT_TCE_IND;
+ pr_debug("SVM: disabling PUT_TCE_IND firmware feature\n");
+ }
+
+ pr_debug(" <- fw_hypertas_feature_init()\n");
+}
+
+struct vec5_fw_feature {
+ unsigned long val;
+ unsigned int feature;
+};
+
+static __initdata struct vec5_fw_feature
+vec5_fw_features_table[] = {
+ {FW_FEATURE_FORM1_AFFINITY, OV5_FORM1_AFFINITY},
+ {FW_FEATURE_PRRN, OV5_PRRN},
+ {FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2},
+ {FW_FEATURE_DRC_INFO, OV5_DRC_INFO},
+ {FW_FEATURE_FORM2_AFFINITY, OV5_FORM2_AFFINITY},
+};
+
+static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
+{
+ unsigned int index, feat;
+ int i;
+
+ pr_debug(" -> fw_vec5_feature_init()\n");
+
+ for (i = 0; i < ARRAY_SIZE(vec5_fw_features_table); i++) {
+ index = OV5_INDX(vec5_fw_features_table[i].feature);
+ feat = OV5_FEAT(vec5_fw_features_table[i].feature);
+
+ if (index < len && (vec5[index] & feat))
+ powerpc_firmware_features |=
+ vec5_fw_features_table[i].val;
+ }
+
+ pr_debug(" <- fw_vec5_feature_init()\n");
+}
+
+/*
+ * Called very early, MMU is off, device-tree isn't unflattened
+ */
+static int __init probe_fw_features(unsigned long node, const char *uname, int
+ depth, void *data)
+{
+ const char *prop;
+ int len;
+ static int hypertas_found;
+ static int vec5_found;
+
+ if (depth != 1)
+ return 0;
+
+ if (!strcmp(uname, "rtas") || !strcmp(uname, "rtas@0")) {
+ prop = of_get_flat_dt_prop(node, "ibm,hypertas-functions",
+ &len);
+ if (prop) {
+ powerpc_firmware_features |= FW_FEATURE_LPAR;
+ fw_hypertas_feature_init(prop, len);
+ }
+
+ hypertas_found = 1;
+ }
+
+ if (!strcmp(uname, "chosen")) {
+ prop = of_get_flat_dt_prop(node, "ibm,architecture-vec-5",
+ &len);
+ if (prop)
+ fw_vec5_feature_init(prop, len);
+
+ vec5_found = 1;
+ }
+
+ return hypertas_found && vec5_found;
+}
+
+void __init pseries_probe_fw_features(void)
+{
+ of_scan_flat_dt(probe_fw_features, NULL);
+}
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
new file mode 100644
index 000000000..e62835a12
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -0,0 +1,901 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pseries CPU Hotplug infrastructure.
+ *
+ * Split out from arch/powerpc/platforms/pseries/setup.c
+ * arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
+ *
+ * Peter Bergner, IBM March 2001.
+ * Copyright (C) 2001 IBM.
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ * Plus various changes from other IBM teams...
+ *
+ * Copyright (C) 2006 Michael Ellerman, IBM Corporation
+ */
+
+#define pr_fmt(fmt) "pseries-hotplug-cpu: " fmt
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/sched.h> /* for idle_task_exit */
+#include <linux/sched/hotplug.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/vdso_datapage.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
+
+#include "pseries.h"
+
+/* This version can't take the spinlock, because it never returns */
+static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
+
+/*
+ * Record the CPU ids used on each nodes.
+ * Protected by cpu_add_remove_lock.
+ */
+static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES];
+
+static void rtas_stop_self(void)
+{
+ static struct rtas_args args;
+
+ local_irq_disable();
+
+ BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
+
+ rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
+
+ panic("Alas, I survived.\n");
+}
+
+static void pseries_cpu_offline_self(void)
+{
+ unsigned int hwcpu = hard_smp_processor_id();
+
+ local_irq_disable();
+ idle_task_exit();
+ if (xive_enabled())
+ xive_teardown_cpu();
+ else
+ xics_teardown_cpu();
+
+ unregister_slb_shadow(hwcpu);
+ unregister_vpa(hwcpu);
+ rtas_stop_self();
+
+ /* Should never get here... */
+ BUG();
+ for(;;);
+}
+
+static int pseries_cpu_disable(void)
+{
+ int cpu = smp_processor_id();
+
+ set_cpu_online(cpu, false);
+ vdso_data->processorCount--;
+
+ /*fix boot_cpuid here*/
+ if (cpu == boot_cpuid)
+ boot_cpuid = cpumask_any(cpu_online_mask);
+
+ /* FIXME: abstract this to not be platform specific later on */
+ if (xive_enabled())
+ xive_smp_disable_cpu();
+ else
+ xics_migrate_irqs_away();
+
+ cleanup_cpu_mmu_context();
+
+ return 0;
+}
+
+/*
+ * pseries_cpu_die: Wait for the cpu to die.
+ * @cpu: logical processor id of the CPU whose death we're awaiting.
+ *
+ * This function is called from the context of the thread which is performing
+ * the cpu-offline. Here we wait for long enough to allow the cpu in question
+ * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
+ * notifications.
+ *
+ * OTOH, pseries_cpu_offline_self() is called by the @cpu when it wants to
+ * self-destruct.
+ */
+static void pseries_cpu_die(unsigned int cpu)
+{
+ int cpu_status = 1;
+ unsigned int pcpu = get_hard_smp_processor_id(cpu);
+ unsigned long timeout = jiffies + msecs_to_jiffies(120000);
+
+ while (true) {
+ cpu_status = smp_query_cpu_stopped(pcpu);
+ if (cpu_status == QCSS_STOPPED ||
+ cpu_status == QCSS_HARDWARE_ERROR)
+ break;
+
+ if (time_after(jiffies, timeout)) {
+ pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n",
+ cpu, pcpu);
+ timeout = jiffies + msecs_to_jiffies(120000);
+ }
+
+ cond_resched();
+ }
+
+ if (cpu_status == QCSS_HARDWARE_ERROR) {
+ pr_warn("CPU %i (hwid %i) reported error while dying\n",
+ cpu, pcpu);
+ }
+
+ paca_ptrs[cpu]->cpu_start = 0;
+}
+
+/**
+ * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids.
+ * @nthreads : the number of threads (cpu ids)
+ * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any
+ * node can be peek.
+ * @cpu_mask: the returned CPU mask.
+ *
+ * Returns 0 on success.
+ */
+static int find_cpu_id_range(unsigned int nthreads, int assigned_node,
+ cpumask_var_t *cpu_mask)
+{
+ cpumask_var_t candidate_mask;
+ unsigned int cpu, node;
+ int rc = -ENOSPC;
+
+ if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_clear(*cpu_mask);
+ for (cpu = 0; cpu < nthreads; cpu++)
+ cpumask_set_cpu(cpu, *cpu_mask);
+
+ BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+
+ /* Get a bitmap of unoccupied slots. */
+ cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
+
+ if (assigned_node != NUMA_NO_NODE) {
+ /*
+ * Remove free ids previously assigned on the other nodes. We
+ * can walk only online nodes because once a node became online
+ * it is not turned offlined back.
+ */
+ for_each_online_node(node) {
+ if (node == assigned_node)
+ continue;
+ cpumask_andnot(candidate_mask, candidate_mask,
+ node_recorded_ids_map[node]);
+ }
+ }
+
+ if (cpumask_empty(candidate_mask))
+ goto out;
+
+ while (!cpumask_empty(*cpu_mask)) {
+ if (cpumask_subset(*cpu_mask, candidate_mask))
+ /* Found a range where we can insert the new cpu(s) */
+ break;
+ cpumask_shift_left(*cpu_mask, *cpu_mask, nthreads);
+ }
+
+ if (!cpumask_empty(*cpu_mask))
+ rc = 0;
+
+out:
+ free_cpumask_var(candidate_mask);
+ return rc;
+}
+
+/*
+ * Update cpu_present_mask and paca(s) for a new cpu node. The wrinkle
+ * here is that a cpu device node may represent multiple logical cpus
+ * in the SMT case. We must honor the assumption in other code that
+ * the logical ids for sibling SMT threads x and y are adjacent, such
+ * that x^1 == y and y^1 == x.
+ */
+static int pseries_add_processor(struct device_node *np)
+{
+ int len, nthreads, node, cpu, assigned_node;
+ int rc = 0;
+ cpumask_var_t cpu_mask;
+ const __be32 *intserv;
+
+ intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
+ if (!intserv)
+ return 0;
+
+ nthreads = len / sizeof(u32);
+
+ if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ /*
+ * Fetch from the DT nodes read by dlpar_configure_connector() the NUMA
+ * node id the added CPU belongs to.
+ */
+ node = of_node_to_nid(np);
+ if (node < 0 || !node_possible(node))
+ node = first_online_node;
+
+ BUG_ON(node == NUMA_NO_NODE);
+ assigned_node = node;
+
+ cpu_maps_update_begin();
+
+ rc = find_cpu_id_range(nthreads, node, &cpu_mask);
+ if (rc && nr_node_ids > 1) {
+ /*
+ * Try again, considering the free CPU ids from the other node.
+ */
+ node = NUMA_NO_NODE;
+ rc = find_cpu_id_range(nthreads, NUMA_NO_NODE, &cpu_mask);
+ }
+
+ if (rc) {
+ pr_err("Cannot add cpu %pOF; this system configuration"
+ " supports %d logical cpus.\n", np, num_possible_cpus());
+ goto out;
+ }
+
+ for_each_cpu(cpu, cpu_mask) {
+ BUG_ON(cpu_present(cpu));
+ set_cpu_present(cpu, true);
+ set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
+ }
+
+ /* Record the newly used CPU ids for the associate node. */
+ cpumask_or(node_recorded_ids_map[assigned_node],
+ node_recorded_ids_map[assigned_node], cpu_mask);
+
+ /*
+ * If node is set to NUMA_NO_NODE, CPU ids have be reused from
+ * another node, remove them from its mask.
+ */
+ if (node == NUMA_NO_NODE) {
+ cpu = cpumask_first(cpu_mask);
+ pr_warn("Reusing free CPU ids %d-%d from another node\n",
+ cpu, cpu + nthreads - 1);
+ for_each_online_node(node) {
+ if (node == assigned_node)
+ continue;
+ cpumask_andnot(node_recorded_ids_map[node],
+ node_recorded_ids_map[node],
+ cpu_mask);
+ }
+ }
+
+out:
+ cpu_maps_update_done();
+ free_cpumask_var(cpu_mask);
+ return rc;
+}
+
+/*
+ * Update the present map for a cpu node which is going away, and set
+ * the hard id in the paca(s) to -1 to be consistent with boot time
+ * convention for non-present cpus.
+ */
+static void pseries_remove_processor(struct device_node *np)
+{
+ unsigned int cpu;
+ int len, nthreads, i;
+ const __be32 *intserv;
+ u32 thread;
+
+ intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
+ if (!intserv)
+ return;
+
+ nthreads = len / sizeof(u32);
+
+ cpu_maps_update_begin();
+ for (i = 0; i < nthreads; i++) {
+ thread = be32_to_cpu(intserv[i]);
+ for_each_present_cpu(cpu) {
+ if (get_hard_smp_processor_id(cpu) != thread)
+ continue;
+ BUG_ON(cpu_online(cpu));
+ set_cpu_present(cpu, false);
+ set_hard_smp_processor_id(cpu, -1);
+ update_numa_cpu_lookup_table(cpu, -1);
+ break;
+ }
+ if (cpu >= nr_cpu_ids)
+ printk(KERN_WARNING "Could not find cpu to remove "
+ "with physical id 0x%x\n", thread);
+ }
+ cpu_maps_update_done();
+}
+
+static int dlpar_offline_cpu(struct device_node *dn)
+{
+ int rc = 0;
+ unsigned int cpu;
+ int len, nthreads, i;
+ const __be32 *intserv;
+ u32 thread;
+
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+ if (!intserv)
+ return -EINVAL;
+
+ nthreads = len / sizeof(u32);
+
+ cpu_maps_update_begin();
+ for (i = 0; i < nthreads; i++) {
+ thread = be32_to_cpu(intserv[i]);
+ for_each_present_cpu(cpu) {
+ if (get_hard_smp_processor_id(cpu) != thread)
+ continue;
+
+ if (!cpu_online(cpu))
+ break;
+
+ /*
+ * device_offline() will return -EBUSY (via cpu_down()) if there
+ * is only one CPU left. Check it here to fail earlier and with a
+ * more informative error message, while also retaining the
+ * cpu_add_remove_lock to be sure that no CPUs are being
+ * online/offlined during this check.
+ */
+ if (num_online_cpus() == 1) {
+ pr_warn("Unable to remove last online CPU %pOFn\n", dn);
+ rc = -EBUSY;
+ goto out_unlock;
+ }
+
+ cpu_maps_update_done();
+ rc = device_offline(get_cpu_device(cpu));
+ if (rc)
+ goto out;
+ cpu_maps_update_begin();
+ break;
+ }
+ if (cpu == num_possible_cpus()) {
+ pr_warn("Could not find cpu to offline with physical id 0x%x\n",
+ thread);
+ }
+ }
+out_unlock:
+ cpu_maps_update_done();
+
+out:
+ return rc;
+}
+
+static int dlpar_online_cpu(struct device_node *dn)
+{
+ int rc = 0;
+ unsigned int cpu;
+ int len, nthreads, i;
+ const __be32 *intserv;
+ u32 thread;
+
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+ if (!intserv)
+ return -EINVAL;
+
+ nthreads = len / sizeof(u32);
+
+ cpu_maps_update_begin();
+ for (i = 0; i < nthreads; i++) {
+ thread = be32_to_cpu(intserv[i]);
+ for_each_present_cpu(cpu) {
+ if (get_hard_smp_processor_id(cpu) != thread)
+ continue;
+
+ if (!topology_is_primary_thread(cpu)) {
+ if (cpu_smt_control != CPU_SMT_ENABLED)
+ break;
+ if (!topology_smt_thread_allowed(cpu))
+ break;
+ }
+
+ cpu_maps_update_done();
+ find_and_update_cpu_nid(cpu);
+ rc = device_online(get_cpu_device(cpu));
+ if (rc) {
+ dlpar_offline_cpu(dn);
+ goto out;
+ }
+ cpu_maps_update_begin();
+
+ break;
+ }
+ if (cpu == num_possible_cpus())
+ printk(KERN_WARNING "Could not find cpu to online "
+ "with physical id 0x%x\n", thread);
+ }
+ cpu_maps_update_done();
+
+out:
+ return rc;
+
+}
+
+static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
+{
+ struct device_node *child = NULL;
+ u32 my_drc_index;
+ bool found;
+ int rc;
+
+ /* Assume cpu doesn't exist */
+ found = false;
+
+ for_each_child_of_node(parent, child) {
+ rc = of_property_read_u32(child, "ibm,my-drc-index",
+ &my_drc_index);
+ if (rc)
+ continue;
+
+ if (my_drc_index == drc_index) {
+ of_node_put(child);
+ found = true;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static bool drc_info_valid_index(struct device_node *parent, u32 drc_index)
+{
+ struct property *info;
+ struct of_drc_info drc;
+ const __be32 *value;
+ u32 index;
+ int count, i, j;
+
+ info = of_find_property(parent, "ibm,drc-info", NULL);
+ if (!info)
+ return false;
+
+ value = of_prop_next_u32(info, NULL, &count);
+
+ /* First value of ibm,drc-info is number of drc-info records */
+ if (value)
+ value++;
+ else
+ return false;
+
+ for (i = 0; i < count; i++) {
+ if (of_read_drc_info_cell(&info, &value, &drc))
+ return false;
+
+ if (strncmp(drc.drc_type, "CPU", 3))
+ break;
+
+ if (drc_index > drc.last_drc_index)
+ continue;
+
+ index = drc.drc_index_start;
+ for (j = 0; j < drc.num_sequential_elems; j++) {
+ if (drc_index == index)
+ return true;
+
+ index += drc.sequential_inc;
+ }
+ }
+
+ return false;
+}
+
+static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
+{
+ bool found = false;
+ int rc, index;
+
+ if (of_property_present(parent, "ibm,drc-info"))
+ return drc_info_valid_index(parent, drc_index);
+
+ /* Note that the format of the ibm,drc-indexes array is
+ * the number of entries in the array followed by the array
+ * of drc values so we start looking at index = 1.
+ */
+ index = 1;
+ while (!found) {
+ u32 drc;
+
+ rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
+ index++, &drc);
+
+ if (rc)
+ break;
+
+ if (drc == drc_index)
+ found = true;
+ }
+
+ return found;
+}
+
+static int pseries_cpuhp_attach_nodes(struct device_node *dn)
+{
+ struct of_changeset cs;
+ int ret;
+
+ /*
+ * This device node is unattached but may have siblings; open-code the
+ * traversal.
+ */
+ for (of_changeset_init(&cs); dn != NULL; dn = dn->sibling) {
+ ret = of_changeset_attach_node(&cs, dn);
+ if (ret)
+ goto out;
+ }
+
+ ret = of_changeset_apply(&cs);
+out:
+ of_changeset_destroy(&cs);
+ return ret;
+}
+
+static ssize_t dlpar_cpu_add(u32 drc_index)
+{
+ struct device_node *dn, *parent;
+ int rc, saved_rc;
+
+ pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);
+
+ parent = of_find_node_by_path("/cpus");
+ if (!parent) {
+ pr_warn("Failed to find CPU root node \"/cpus\"\n");
+ return -ENODEV;
+ }
+
+ if (dlpar_cpu_exists(parent, drc_index)) {
+ of_node_put(parent);
+ pr_warn("CPU with drc index %x already exists\n", drc_index);
+ return -EINVAL;
+ }
+
+ if (!valid_cpu_drc_index(parent, drc_index)) {
+ of_node_put(parent);
+ pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);
+ return -EINVAL;
+ }
+
+ rc = dlpar_acquire_drc(drc_index);
+ if (rc) {
+ pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
+ rc, drc_index);
+ of_node_put(parent);
+ return -EINVAL;
+ }
+
+ dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
+ if (!dn) {
+ pr_warn("Failed call to configure-connector, drc index: %x\n",
+ drc_index);
+ dlpar_release_drc(drc_index);
+ of_node_put(parent);
+ return -EINVAL;
+ }
+
+ rc = pseries_cpuhp_attach_nodes(dn);
+
+ /* Regardless we are done with parent now */
+ of_node_put(parent);
+
+ if (rc) {
+ saved_rc = rc;
+ pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n",
+ dn, rc, drc_index);
+
+ rc = dlpar_release_drc(drc_index);
+ if (!rc)
+ dlpar_free_cc_nodes(dn);
+
+ return saved_rc;
+ }
+
+ update_numa_distance(dn);
+
+ rc = dlpar_online_cpu(dn);
+ if (rc) {
+ saved_rc = rc;
+ pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n",
+ dn, rc, drc_index);
+
+ rc = dlpar_detach_node(dn);
+ if (!rc)
+ dlpar_release_drc(drc_index);
+
+ return saved_rc;
+ }
+
+ pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn,
+ drc_index);
+ return rc;
+}
+
+static unsigned int pseries_cpuhp_cache_use_count(const struct device_node *cachedn)
+{
+ unsigned int use_count = 0;
+ struct device_node *dn, *tn;
+
+ WARN_ON(!of_node_is_type(cachedn, "cache"));
+
+ for_each_of_cpu_node(dn) {
+ tn = of_find_next_cache_node(dn);
+ of_node_put(tn);
+ if (tn == cachedn)
+ use_count++;
+ }
+
+ for_each_node_by_type(dn, "cache") {
+ tn = of_find_next_cache_node(dn);
+ of_node_put(tn);
+ if (tn == cachedn)
+ use_count++;
+ }
+
+ return use_count;
+}
+
+static int pseries_cpuhp_detach_nodes(struct device_node *cpudn)
+{
+ struct device_node *dn;
+ struct of_changeset cs;
+ int ret = 0;
+
+ of_changeset_init(&cs);
+ ret = of_changeset_detach_node(&cs, cpudn);
+ if (ret)
+ goto out;
+
+ dn = cpudn;
+ while ((dn = of_find_next_cache_node(dn))) {
+ if (pseries_cpuhp_cache_use_count(dn) > 1) {
+ of_node_put(dn);
+ break;
+ }
+
+ ret = of_changeset_detach_node(&cs, dn);
+ of_node_put(dn);
+ if (ret)
+ goto out;
+ }
+
+ ret = of_changeset_apply(&cs);
+out:
+ of_changeset_destroy(&cs);
+ return ret;
+}
+
+static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
+{
+ int rc;
+
+ pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n",
+ dn, drc_index);
+
+ rc = dlpar_offline_cpu(dn);
+ if (rc) {
+ pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn, rc);
+ return -EINVAL;
+ }
+
+ rc = dlpar_release_drc(drc_index);
+ if (rc) {
+ pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
+ drc_index, dn, rc);
+ dlpar_online_cpu(dn);
+ return rc;
+ }
+
+ rc = pseries_cpuhp_detach_nodes(dn);
+ if (rc) {
+ int saved_rc = rc;
+
+ pr_warn("Failed to detach CPU %pOFn, rc: %d", dn, rc);
+
+ rc = dlpar_acquire_drc(drc_index);
+ if (!rc)
+ dlpar_online_cpu(dn);
+
+ return saved_rc;
+ }
+
+ pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);
+ return 0;
+}
+
+static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
+{
+ struct device_node *dn;
+ u32 my_index;
+ int rc;
+
+ for_each_node_by_type(dn, "cpu") {
+ rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
+ if (rc)
+ continue;
+
+ if (my_index == drc_index)
+ break;
+ }
+
+ return dn;
+}
+
+static int dlpar_cpu_remove_by_index(u32 drc_index)
+{
+ struct device_node *dn;
+ int rc;
+
+ dn = cpu_drc_index_to_dn(drc_index);
+ if (!dn) {
+ pr_warn("Cannot find CPU (drc index %x) to remove\n",
+ drc_index);
+ return -ENODEV;
+ }
+
+ rc = dlpar_cpu_remove(dn, drc_index);
+ of_node_put(dn);
+ return rc;
+}
+
+int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
+{
+ u32 drc_index;
+ int rc;
+
+ drc_index = hp_elog->_drc_u.drc_index;
+
+ lock_device_hotplug();
+
+ switch (hp_elog->action) {
+ case PSERIES_HP_ELOG_ACTION_REMOVE:
+ if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) {
+ rc = dlpar_cpu_remove_by_index(drc_index);
+ /*
+ * Setting the isolation state of an UNISOLATED/CONFIGURED
+ * device to UNISOLATE is a no-op, but the hypervisor can
+ * use it as a hint that the CPU removal failed.
+ */
+ if (rc)
+ dlpar_unisolate_drc(drc_index);
+ }
+ else
+ rc = -EINVAL;
+ break;
+ case PSERIES_HP_ELOG_ACTION_ADD:
+ if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+ rc = dlpar_cpu_add(drc_index);
+ else
+ rc = -EINVAL;
+ break;
+ default:
+ pr_err("Invalid action (%d) specified\n", hp_elog->action);
+ rc = -EINVAL;
+ break;
+ }
+
+ unlock_device_hotplug();
+ return rc;
+}
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+
+static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
+{
+ u32 drc_index;
+ int rc;
+
+ rc = kstrtou32(buf, 0, &drc_index);
+ if (rc)
+ return -EINVAL;
+
+ rc = dlpar_cpu_add(drc_index);
+
+ return rc ? rc : count;
+}
+
+static ssize_t dlpar_cpu_release(const char *buf, size_t count)
+{
+ struct device_node *dn;
+ u32 drc_index;
+ int rc;
+
+ dn = of_find_node_by_path(buf);
+ if (!dn)
+ return -EINVAL;
+
+ rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
+ if (rc) {
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ rc = dlpar_cpu_remove(dn, drc_index);
+ of_node_put(dn);
+
+ return rc ? rc : count;
+}
+
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
+static int pseries_smp_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct of_reconfig_data *rd = data;
+ int err = 0;
+
+ switch (action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ err = pseries_add_processor(rd->dn);
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ pseries_remove_processor(rd->dn);
+ break;
+ }
+ return notifier_from_errno(err);
+}
+
+static struct notifier_block pseries_smp_nb = {
+ .notifier_call = pseries_smp_notifier,
+};
+
+void __init pseries_cpu_hotplug_init(void)
+{
+ int qcss_tok;
+
+ rtas_stop_self_token = rtas_function_token(RTAS_FN_STOP_SELF);
+ qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE);
+
+ if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
+ qcss_tok == RTAS_UNKNOWN_SERVICE) {
+ printk(KERN_INFO "CPU Hotplug not supported by firmware "
+ "- disabling.\n");
+ return;
+ }
+
+ smp_ops->cpu_offline_self = pseries_cpu_offline_self;
+ smp_ops->cpu_disable = pseries_cpu_disable;
+ smp_ops->cpu_die = pseries_cpu_die;
+}
+
+static int __init pseries_dlpar_init(void)
+{
+ unsigned int node;
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ ppc_md.cpu_probe = dlpar_cpu_probe;
+ ppc_md.cpu_release = dlpar_cpu_release;
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
+ /* Processors can be added/removed only on LPAR */
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ for_each_node(node) {
+ if (!alloc_cpumask_var_node(&node_recorded_ids_map[node],
+ GFP_KERNEL, node))
+ return -ENOMEM;
+
+ /* Record ids of CPU added at boot time */
+ cpumask_copy(node_recorded_ids_map[node],
+ cpumask_of_node(node));
+ }
+
+ of_reconfig_notifier_register(&pseries_smp_nb);
+ }
+
+ return 0;
+}
+machine_arch_initcall(pseries, pseries_dlpar_init);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
new file mode 100644
index 000000000..4adca5b61
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -0,0 +1,923 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pseries Memory Hotplug infrastructure.
+ *
+ * Copyright (C) 2008 Badari Pulavarty, IBM Corporation
+ */
+
+#define pr_fmt(fmt) "pseries-hotplug-mem: " fmt
+
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/memblock.h>
+#include <linux/memory.h>
+#include <linux/memory_hotplug.h>
+#include <linux/slab.h>
+
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/sparsemem.h>
+#include <asm/fadump.h>
+#include <asm/drmem.h>
+#include "pseries.h"
+
+static void dlpar_free_property(struct property *prop)
+{
+ kfree(prop->name);
+ kfree(prop->value);
+ kfree(prop);
+}
+
+static struct property *dlpar_clone_property(struct property *prop,
+ u32 prop_size)
+{
+ struct property *new_prop;
+
+ new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+ if (!new_prop)
+ return NULL;
+
+ new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+ new_prop->value = kzalloc(prop_size, GFP_KERNEL);
+ if (!new_prop->name || !new_prop->value) {
+ dlpar_free_property(new_prop);
+ return NULL;
+ }
+
+ memcpy(new_prop->value, prop->value, prop->length);
+ new_prop->length = prop_size;
+
+ of_property_set_flag(new_prop, OF_DYNAMIC);
+ return new_prop;
+}
+
+static bool find_aa_index(struct device_node *dr_node,
+ struct property *ala_prop,
+ const u32 *lmb_assoc, u32 *aa_index)
+{
+ u32 *assoc_arrays, new_prop_size;
+ struct property *new_prop;
+ int aa_arrays, aa_array_entries, aa_array_sz;
+ int i, index;
+
+ /*
+ * The ibm,associativity-lookup-arrays property is defined to be
+ * a 32-bit value specifying the number of associativity arrays
+ * followed by a 32-bitvalue specifying the number of entries per
+ * array, followed by the associativity arrays.
+ */
+ assoc_arrays = ala_prop->value;
+
+ aa_arrays = be32_to_cpu(assoc_arrays[0]);
+ aa_array_entries = be32_to_cpu(assoc_arrays[1]);
+ aa_array_sz = aa_array_entries * sizeof(u32);
+
+ for (i = 0; i < aa_arrays; i++) {
+ index = (i * aa_array_entries) + 2;
+
+ if (memcmp(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz))
+ continue;
+
+ *aa_index = i;
+ return true;
+ }
+
+ new_prop_size = ala_prop->length + aa_array_sz;
+ new_prop = dlpar_clone_property(ala_prop, new_prop_size);
+ if (!new_prop)
+ return false;
+
+ assoc_arrays = new_prop->value;
+
+ /* increment the number of entries in the lookup array */
+ assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
+
+ /* copy the new associativity into the lookup array */
+ index = aa_arrays * aa_array_entries + 2;
+ memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz);
+
+ of_update_property(dr_node, new_prop);
+
+ /*
+ * The associativity lookup array index for this lmb is
+ * number of entries - 1 since we added its associativity
+ * to the end of the lookup array.
+ */
+ *aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
+ return true;
+}
+
+static int update_lmb_associativity_index(struct drmem_lmb *lmb)
+{
+ struct device_node *parent, *lmb_node, *dr_node;
+ struct property *ala_prop;
+ const u32 *lmb_assoc;
+ u32 aa_index;
+ bool found;
+
+ parent = of_find_node_by_path("/");
+ if (!parent)
+ return -ENODEV;
+
+ lmb_node = dlpar_configure_connector(cpu_to_be32(lmb->drc_index),
+ parent);
+ of_node_put(parent);
+ if (!lmb_node)
+ return -EINVAL;
+
+ lmb_assoc = of_get_property(lmb_node, "ibm,associativity", NULL);
+ if (!lmb_assoc) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ update_numa_distance(lmb_node);
+
+ dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dr_node) {
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ ala_prop = of_find_property(dr_node, "ibm,associativity-lookup-arrays",
+ NULL);
+ if (!ala_prop) {
+ of_node_put(dr_node);
+ dlpar_free_cc_nodes(lmb_node);
+ return -ENODEV;
+ }
+
+ found = find_aa_index(dr_node, ala_prop, lmb_assoc, &aa_index);
+
+ of_node_put(dr_node);
+ dlpar_free_cc_nodes(lmb_node);
+
+ if (!found) {
+ pr_err("Could not find LMB associativity\n");
+ return -1;
+ }
+
+ lmb->aa_index = aa_index;
+ return 0;
+}
+
+static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
+{
+ unsigned long section_nr;
+ struct memory_block *mem_block;
+
+ section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+
+ mem_block = find_memory_block(section_nr);
+ return mem_block;
+}
+
+static int get_lmb_range(u32 drc_index, int n_lmbs,
+ struct drmem_lmb **start_lmb,
+ struct drmem_lmb **end_lmb)
+{
+ struct drmem_lmb *lmb, *start, *end;
+ struct drmem_lmb *limit;
+
+ start = NULL;
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
+ start = lmb;
+ break;
+ }
+ }
+
+ if (!start)
+ return -EINVAL;
+
+ end = &start[n_lmbs];
+
+ limit = &drmem_info->lmbs[drmem_info->n_lmbs];
+ if (end > limit)
+ return -EINVAL;
+
+ *start_lmb = start;
+ *end_lmb = end;
+ return 0;
+}
+
+static int dlpar_change_lmb_state(struct drmem_lmb *lmb, bool online)
+{
+ struct memory_block *mem_block;
+ int rc;
+
+ mem_block = lmb_to_memblock(lmb);
+ if (!mem_block)
+ return -EINVAL;
+
+ if (online && mem_block->dev.offline)
+ rc = device_online(&mem_block->dev);
+ else if (!online && !mem_block->dev.offline)
+ rc = device_offline(&mem_block->dev);
+ else
+ rc = 0;
+
+ put_device(&mem_block->dev);
+
+ return rc;
+}
+
+static int dlpar_online_lmb(struct drmem_lmb *lmb)
+{
+ return dlpar_change_lmb_state(lmb, true);
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static int dlpar_offline_lmb(struct drmem_lmb *lmb)
+{
+ return dlpar_change_lmb_state(lmb, false);
+}
+
+static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size)
+{
+ unsigned long start_pfn;
+ int sections_per_block;
+ int i;
+
+ start_pfn = base >> PAGE_SHIFT;
+
+ lock_device_hotplug();
+
+ if (!pfn_valid(start_pfn))
+ goto out;
+
+ sections_per_block = memory_block_size / MIN_MEMORY_BLOCK_SIZE;
+
+ for (i = 0; i < sections_per_block; i++) {
+ __remove_memory(base, MIN_MEMORY_BLOCK_SIZE);
+ base += MIN_MEMORY_BLOCK_SIZE;
+ }
+
+out:
+ /* Update memory regions for memory remove */
+ memblock_remove(base, memblock_size);
+ unlock_device_hotplug();
+ return 0;
+}
+
+static int pseries_remove_mem_node(struct device_node *np)
+{
+ int ret;
+ struct resource res;
+
+ /*
+ * Check to see if we are actually removing memory
+ */
+ if (!of_node_is_type(np, "memory"))
+ return 0;
+
+ /*
+ * Find the base address and size of the memblock
+ */
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret)
+ return ret;
+
+ pseries_remove_memblock(res.start, resource_size(&res));
+ return 0;
+}
+
+static bool lmb_is_removable(struct drmem_lmb *lmb)
+{
+ if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+ !(lmb->flags & DRCONF_MEM_ASSIGNED))
+ return false;
+
+#ifdef CONFIG_FA_DUMP
+ /*
+ * Don't hot-remove memory that falls in fadump boot memory area
+ * and memory that is reserved for capturing old kernel memory.
+ */
+ if (is_fadump_memory_area(lmb->base_addr, memory_block_size_bytes()))
+ return false;
+#endif
+ /* device_offline() will determine if we can actually remove this lmb */
+ return true;
+}
+
+static int dlpar_add_lmb(struct drmem_lmb *);
+
+static int dlpar_remove_lmb(struct drmem_lmb *lmb)
+{
+ struct memory_block *mem_block;
+ int rc;
+
+ if (!lmb_is_removable(lmb))
+ return -EINVAL;
+
+ mem_block = lmb_to_memblock(lmb);
+ if (mem_block == NULL)
+ return -EINVAL;
+
+ rc = dlpar_offline_lmb(lmb);
+ if (rc) {
+ put_device(&mem_block->dev);
+ return rc;
+ }
+
+ __remove_memory(lmb->base_addr, memory_block_size);
+ put_device(&mem_block->dev);
+
+ /* Update memory regions for memory remove */
+ memblock_remove(lmb->base_addr, memory_block_size);
+
+ invalidate_lmb_associativity_index(lmb);
+ lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+
+ return 0;
+}
+
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
+{
+ struct drmem_lmb *lmb;
+ int lmbs_reserved = 0;
+ int lmbs_available = 0;
+ int rc;
+
+ pr_info("Attempting to hot-remove %d LMB(s)\n", lmbs_to_remove);
+
+ if (lmbs_to_remove == 0)
+ return -EINVAL;
+
+ /* Validate that there are enough LMBs to satisfy the request */
+ for_each_drmem_lmb(lmb) {
+ if (lmb_is_removable(lmb))
+ lmbs_available++;
+
+ if (lmbs_available == lmbs_to_remove)
+ break;
+ }
+
+ if (lmbs_available < lmbs_to_remove) {
+ pr_info("Not enough LMBs available (%d of %d) to satisfy request\n",
+ lmbs_available, lmbs_to_remove);
+ return -EINVAL;
+ }
+
+ for_each_drmem_lmb(lmb) {
+ rc = dlpar_remove_lmb(lmb);
+ if (rc)
+ continue;
+
+ /* Mark this lmb so we can add it later if all of the
+ * requested LMBs cannot be removed.
+ */
+ drmem_mark_lmb_reserved(lmb);
+
+ lmbs_reserved++;
+ if (lmbs_reserved == lmbs_to_remove)
+ break;
+ }
+
+ if (lmbs_reserved != lmbs_to_remove) {
+ pr_err("Memory hot-remove failed, adding LMB's back\n");
+
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
+ continue;
+
+ rc = dlpar_add_lmb(lmb);
+ if (rc)
+ pr_err("Failed to add LMB back, drc index %x\n",
+ lmb->drc_index);
+
+ drmem_remove_lmb_reservation(lmb);
+
+ lmbs_reserved--;
+ if (lmbs_reserved == 0)
+ break;
+ }
+
+ rc = -EINVAL;
+ } else {
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
+ continue;
+
+ dlpar_release_drc(lmb->drc_index);
+ pr_info("Memory at %llx was hot-removed\n",
+ lmb->base_addr);
+
+ drmem_remove_lmb_reservation(lmb);
+
+ lmbs_reserved--;
+ if (lmbs_reserved == 0)
+ break;
+ }
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static int dlpar_memory_remove_by_index(u32 drc_index)
+{
+ struct drmem_lmb *lmb;
+ int lmb_found;
+ int rc;
+
+ pr_debug("Attempting to hot-remove LMB, drc index %x\n", drc_index);
+
+ lmb_found = 0;
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
+ lmb_found = 1;
+ rc = dlpar_remove_lmb(lmb);
+ if (!rc)
+ dlpar_release_drc(lmb->drc_index);
+
+ break;
+ }
+ }
+
+ if (!lmb_found) {
+ pr_debug("Failed to look up LMB for drc index %x\n", drc_index);
+ rc = -EINVAL;
+ } else if (rc) {
+ pr_debug("Failed to hot-remove memory at %llx\n",
+ lmb->base_addr);
+ } else {
+ pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr);
+ }
+
+ return rc;
+}
+
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
+{
+ struct drmem_lmb *lmb, *start_lmb, *end_lmb;
+ int rc;
+
+ pr_info("Attempting to hot-remove %u LMB(s) at %x\n",
+ lmbs_to_remove, drc_index);
+
+ if (lmbs_to_remove == 0)
+ return -EINVAL;
+
+ rc = get_lmb_range(drc_index, lmbs_to_remove, &start_lmb, &end_lmb);
+ if (rc)
+ return -EINVAL;
+
+ /*
+ * Validate that all LMBs in range are not reserved. Note that it
+ * is ok if they are !ASSIGNED since our goal here is to remove the
+ * LMB range, regardless of whether some LMBs were already removed
+ * by any other reason.
+ *
+ * This is a contrast to what is done in remove_by_count() where we
+ * check for both RESERVED and !ASSIGNED (via lmb_is_removable()),
+ * because we want to remove a fixed amount of LMBs in that function.
+ */
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (lmb->flags & DRCONF_MEM_RESERVED) {
+ pr_err("Memory at %llx (drc index %x) is reserved\n",
+ lmb->base_addr, lmb->drc_index);
+ return -EINVAL;
+ }
+ }
+
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ /*
+ * dlpar_remove_lmb() will error out if the LMB is already
+ * !ASSIGNED, but this case is a no-op for us.
+ */
+ if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+ continue;
+
+ rc = dlpar_remove_lmb(lmb);
+ if (rc)
+ break;
+
+ drmem_mark_lmb_reserved(lmb);
+ }
+
+ if (rc) {
+ pr_err("Memory indexed-count-remove failed, adding any removed LMBs\n");
+
+
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
+ continue;
+
+ /*
+ * Setting the isolation state of an UNISOLATED/CONFIGURED
+ * device to UNISOLATE is a no-op, but the hypervisor can
+ * use it as a hint that the LMB removal failed.
+ */
+ dlpar_unisolate_drc(lmb->drc_index);
+
+ rc = dlpar_add_lmb(lmb);
+ if (rc)
+ pr_err("Failed to add LMB, drc index %x\n",
+ lmb->drc_index);
+
+ drmem_remove_lmb_reservation(lmb);
+ }
+ rc = -EINVAL;
+ } else {
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
+ continue;
+
+ dlpar_release_drc(lmb->drc_index);
+ pr_info("Memory at %llx (drc index %x) was hot-removed\n",
+ lmb->base_addr, lmb->drc_index);
+
+ drmem_remove_lmb_reservation(lmb);
+ }
+ }
+
+ return rc;
+}
+
+#else
+static inline int pseries_remove_memblock(unsigned long base,
+ unsigned long memblock_size)
+{
+ return -EOPNOTSUPP;
+}
+static inline int pseries_remove_mem_node(struct device_node *np)
+{
+ return 0;
+}
+static int dlpar_remove_lmb(struct drmem_lmb *lmb)
+{
+ return -EOPNOTSUPP;
+}
+static int dlpar_memory_remove_by_count(u32 lmbs_to_remove)
+{
+ return -EOPNOTSUPP;
+}
+static int dlpar_memory_remove_by_index(u32 drc_index)
+{
+ return -EOPNOTSUPP;
+}
+
+static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
+static int dlpar_add_lmb(struct drmem_lmb *lmb)
+{
+ unsigned long block_sz;
+ int nid, rc;
+
+ if (lmb->flags & DRCONF_MEM_ASSIGNED)
+ return -EINVAL;
+
+ rc = update_lmb_associativity_index(lmb);
+ if (rc) {
+ dlpar_release_drc(lmb->drc_index);
+ return rc;
+ }
+
+ block_sz = memory_block_size_bytes();
+
+ /* Find the node id for this LMB. Fake one if necessary. */
+ nid = of_drconf_to_nid_single(lmb);
+ if (nid < 0 || !node_possible(nid))
+ nid = first_online_node;
+
+ /* Add the memory */
+ rc = __add_memory(nid, lmb->base_addr, block_sz, MHP_MEMMAP_ON_MEMORY);
+ if (rc) {
+ invalidate_lmb_associativity_index(lmb);
+ return rc;
+ }
+
+ rc = dlpar_online_lmb(lmb);
+ if (rc) {
+ __remove_memory(lmb->base_addr, block_sz);
+ invalidate_lmb_associativity_index(lmb);
+ } else {
+ lmb->flags |= DRCONF_MEM_ASSIGNED;
+ }
+
+ return rc;
+}
+
+static int dlpar_memory_add_by_count(u32 lmbs_to_add)
+{
+ struct drmem_lmb *lmb;
+ int lmbs_available = 0;
+ int lmbs_reserved = 0;
+ int rc;
+
+ pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
+
+ if (lmbs_to_add == 0)
+ return -EINVAL;
+
+ /* Validate that there are enough LMBs to satisfy the request */
+ for_each_drmem_lmb(lmb) {
+ if (lmb->flags & DRCONF_MEM_RESERVED)
+ continue;
+
+ if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
+ lmbs_available++;
+
+ if (lmbs_available == lmbs_to_add)
+ break;
+ }
+
+ if (lmbs_available < lmbs_to_add)
+ return -EINVAL;
+
+ for_each_drmem_lmb(lmb) {
+ if (lmb->flags & DRCONF_MEM_ASSIGNED)
+ continue;
+
+ rc = dlpar_acquire_drc(lmb->drc_index);
+ if (rc)
+ continue;
+
+ rc = dlpar_add_lmb(lmb);
+ if (rc) {
+ dlpar_release_drc(lmb->drc_index);
+ continue;
+ }
+
+ /* Mark this lmb so we can remove it later if all of the
+ * requested LMBs cannot be added.
+ */
+ drmem_mark_lmb_reserved(lmb);
+ lmbs_reserved++;
+ if (lmbs_reserved == lmbs_to_add)
+ break;
+ }
+
+ if (lmbs_reserved != lmbs_to_add) {
+ pr_err("Memory hot-add failed, removing any added LMBs\n");
+
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
+ continue;
+
+ rc = dlpar_remove_lmb(lmb);
+ if (rc)
+ pr_err("Failed to remove LMB, drc index %x\n",
+ lmb->drc_index);
+ else
+ dlpar_release_drc(lmb->drc_index);
+
+ drmem_remove_lmb_reservation(lmb);
+ lmbs_reserved--;
+
+ if (lmbs_reserved == 0)
+ break;
+ }
+ rc = -EINVAL;
+ } else {
+ for_each_drmem_lmb(lmb) {
+ if (!drmem_lmb_reserved(lmb))
+ continue;
+
+ pr_debug("Memory at %llx (drc index %x) was hot-added\n",
+ lmb->base_addr, lmb->drc_index);
+ drmem_remove_lmb_reservation(lmb);
+ lmbs_reserved--;
+
+ if (lmbs_reserved == 0)
+ break;
+ }
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static int dlpar_memory_add_by_index(u32 drc_index)
+{
+ struct drmem_lmb *lmb;
+ int rc, lmb_found;
+
+ pr_info("Attempting to hot-add LMB, drc index %x\n", drc_index);
+
+ lmb_found = 0;
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index == drc_index) {
+ lmb_found = 1;
+ rc = dlpar_acquire_drc(lmb->drc_index);
+ if (!rc) {
+ rc = dlpar_add_lmb(lmb);
+ if (rc)
+ dlpar_release_drc(lmb->drc_index);
+ }
+
+ break;
+ }
+ }
+
+ if (!lmb_found)
+ rc = -EINVAL;
+
+ if (rc)
+ pr_info("Failed to hot-add memory, drc index %x\n", drc_index);
+ else
+ pr_info("Memory at %llx (drc index %x) was hot-added\n",
+ lmb->base_addr, drc_index);
+
+ return rc;
+}
+
+static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
+{
+ struct drmem_lmb *lmb, *start_lmb, *end_lmb;
+ int rc;
+
+ pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
+ lmbs_to_add, drc_index);
+
+ if (lmbs_to_add == 0)
+ return -EINVAL;
+
+ rc = get_lmb_range(drc_index, lmbs_to_add, &start_lmb, &end_lmb);
+ if (rc)
+ return -EINVAL;
+
+ /* Validate that the LMBs in this range are not reserved */
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ /* Fail immediately if the whole range can't be hot-added */
+ if (lmb->flags & DRCONF_MEM_RESERVED) {
+ pr_err("Memory at %llx (drc index %x) is reserved\n",
+ lmb->base_addr, lmb->drc_index);
+ return -EINVAL;
+ }
+ }
+
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (lmb->flags & DRCONF_MEM_ASSIGNED)
+ continue;
+
+ rc = dlpar_acquire_drc(lmb->drc_index);
+ if (rc)
+ break;
+
+ rc = dlpar_add_lmb(lmb);
+ if (rc) {
+ dlpar_release_drc(lmb->drc_index);
+ break;
+ }
+
+ drmem_mark_lmb_reserved(lmb);
+ }
+
+ if (rc) {
+ pr_err("Memory indexed-count-add failed, removing any added LMBs\n");
+
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
+ continue;
+
+ rc = dlpar_remove_lmb(lmb);
+ if (rc)
+ pr_err("Failed to remove LMB, drc index %x\n",
+ lmb->drc_index);
+ else
+ dlpar_release_drc(lmb->drc_index);
+
+ drmem_remove_lmb_reservation(lmb);
+ }
+ rc = -EINVAL;
+ } else {
+ for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
+ if (!drmem_lmb_reserved(lmb))
+ continue;
+
+ pr_info("Memory at %llx (drc index %x) was hot-added\n",
+ lmb->base_addr, lmb->drc_index);
+ drmem_remove_lmb_reservation(lmb);
+ }
+ }
+
+ return rc;
+}
+
+int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
+{
+ u32 count, drc_index;
+ int rc;
+
+ lock_device_hotplug();
+
+ switch (hp_elog->action) {
+ case PSERIES_HP_ELOG_ACTION_ADD:
+ switch (hp_elog->id_type) {
+ case PSERIES_HP_ELOG_ID_DRC_COUNT:
+ count = hp_elog->_drc_u.drc_count;
+ rc = dlpar_memory_add_by_count(count);
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_INDEX:
+ drc_index = hp_elog->_drc_u.drc_index;
+ rc = dlpar_memory_add_by_index(drc_index);
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_IC:
+ count = hp_elog->_drc_u.ic.count;
+ drc_index = hp_elog->_drc_u.ic.index;
+ rc = dlpar_memory_add_by_ic(count, drc_index);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ break;
+ case PSERIES_HP_ELOG_ACTION_REMOVE:
+ switch (hp_elog->id_type) {
+ case PSERIES_HP_ELOG_ID_DRC_COUNT:
+ count = hp_elog->_drc_u.drc_count;
+ rc = dlpar_memory_remove_by_count(count);
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_INDEX:
+ drc_index = hp_elog->_drc_u.drc_index;
+ rc = dlpar_memory_remove_by_index(drc_index);
+ break;
+ case PSERIES_HP_ELOG_ID_DRC_IC:
+ count = hp_elog->_drc_u.ic.count;
+ drc_index = hp_elog->_drc_u.ic.index;
+ rc = dlpar_memory_remove_by_ic(count, drc_index);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ break;
+ default:
+ pr_err("Invalid action (%d) specified\n", hp_elog->action);
+ rc = -EINVAL;
+ break;
+ }
+
+ if (!rc)
+ rc = drmem_update_dt();
+
+ unlock_device_hotplug();
+ return rc;
+}
+
+static int pseries_add_mem_node(struct device_node *np)
+{
+ int ret;
+ struct resource res;
+
+ /*
+ * Check to see if we are actually adding memory
+ */
+ if (!of_node_is_type(np, "memory"))
+ return 0;
+
+ /*
+ * Find the base and size of the memblock
+ */
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret)
+ return ret;
+
+ /*
+ * Update memory region to represent the memory add
+ */
+ ret = memblock_add(res.start, resource_size(&res));
+ return (ret < 0) ? -EINVAL : 0;
+}
+
+static int pseries_memory_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct of_reconfig_data *rd = data;
+ int err = 0;
+
+ switch (action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ err = pseries_add_mem_node(rd->dn);
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ err = pseries_remove_mem_node(rd->dn);
+ break;
+ case OF_RECONFIG_UPDATE_PROPERTY:
+ if (!strcmp(rd->dn->name,
+ "ibm,dynamic-reconfiguration-memory"))
+ drmem_update_lmbs(rd->prop);
+ }
+ return notifier_from_errno(err);
+}
+
+static struct notifier_block pseries_mem_nb = {
+ .notifier_call = pseries_memory_notifier,
+};
+
+static int __init pseries_memory_hotplug_init(void)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ of_reconfig_notifier_register(&pseries_mem_nb);
+
+ return 0;
+}
+machine_device_initcall(pseries, pseries_memory_hotplug_init);
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
new file mode 100644
index 000000000..2b0cac6fb
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains the generic code to perform a call to the
+ * pSeries LPAR hypervisor.
+ */
+#include <linux/jump_label.h>
+#include <asm/hvcall.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ptrace.h>
+#include <asm/feature-fixups.h>
+
+ .section ".text"
+
+#ifdef CONFIG_TRACEPOINTS
+
+#ifndef CONFIG_JUMP_LABEL
+ .data
+
+ .globl hcall_tracepoint_refcount
+hcall_tracepoint_refcount:
+ .8byte 0
+
+ .section ".text"
+#endif
+
+/*
+ * precall must preserve all registers. use unused STK_PARAM()
+ * areas to save snapshots and opcode. STK_PARAM() in the caller's
+ * frame will be available even on ELFv2 because these are all
+ * variadic functions.
+ */
+#define HCALL_INST_PRECALL(FIRST_REG) \
+ mflr r0; \
+ std r3,STK_PARAM(R3)(r1); \
+ std r4,STK_PARAM(R4)(r1); \
+ std r5,STK_PARAM(R5)(r1); \
+ std r6,STK_PARAM(R6)(r1); \
+ std r7,STK_PARAM(R7)(r1); \
+ std r8,STK_PARAM(R8)(r1); \
+ std r9,STK_PARAM(R9)(r1); \
+ std r10,STK_PARAM(R10)(r1); \
+ std r0,16(r1); \
+ addi r4,r1,STK_PARAM(FIRST_REG); \
+ stdu r1,-STACK_FRAME_MIN_SIZE(r1); \
+ bl CFUNC(__trace_hcall_entry); \
+ ld r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \
+ ld r4,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1); \
+ ld r5,STACK_FRAME_MIN_SIZE+STK_PARAM(R5)(r1); \
+ ld r6,STACK_FRAME_MIN_SIZE+STK_PARAM(R6)(r1); \
+ ld r7,STACK_FRAME_MIN_SIZE+STK_PARAM(R7)(r1); \
+ ld r8,STACK_FRAME_MIN_SIZE+STK_PARAM(R8)(r1); \
+ ld r9,STACK_FRAME_MIN_SIZE+STK_PARAM(R9)(r1); \
+ ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R10)(r1)
+
+/*
+ * postcall is performed immediately before function return which
+ * allows liberal use of volatile registers.
+ */
+#define __HCALL_INST_POSTCALL \
+ ld r0,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \
+ std r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1); \
+ mr r4,r3; \
+ mr r3,r0; \
+ bl CFUNC(__trace_hcall_exit); \
+ ld r0,STACK_FRAME_MIN_SIZE+16(r1); \
+ addi r1,r1,STACK_FRAME_MIN_SIZE; \
+ ld r3,STK_PARAM(R3)(r1); \
+ mtlr r0
+
+#define HCALL_INST_POSTCALL_NORETS \
+ li r5,0; \
+ __HCALL_INST_POSTCALL
+
+#define HCALL_INST_POSTCALL(BUFREG) \
+ mr r5,BUFREG; \
+ __HCALL_INST_POSTCALL
+
+#ifdef CONFIG_JUMP_LABEL
+#define HCALL_BRANCH(LABEL) \
+ ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key)
+#else
+
+/*
+ * We branch around this in early init (eg when populating the MMU
+ * hashtable) by using an unconditional cpu feature.
+ */
+#define HCALL_BRANCH(LABEL) \
+BEGIN_FTR_SECTION; \
+ b 1f; \
+END_FTR_SECTION(0, 1); \
+ LOAD_REG_ADDR(r12, hcall_tracepoint_refcount) ; \
+ ld r12,0(r12); \
+ cmpdi r12,0; \
+ bne- LABEL; \
+1:
+#endif
+
+#else
+#define HCALL_INST_PRECALL(FIRST_ARG)
+#define HCALL_INST_POSTCALL_NORETS
+#define HCALL_INST_POSTCALL(BUFREG)
+#define HCALL_BRANCH(LABEL)
+#endif
+
+_GLOBAL_TOC(plpar_hcall_norets_notrace)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+ HVSC /* invoke the hypervisor */
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+ blr /* return r3 = status */
+
+_GLOBAL_TOC(plpar_hcall_norets)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+ HCALL_BRANCH(plpar_hcall_norets_trace)
+ HVSC /* invoke the hypervisor */
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+ blr /* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall_norets_trace:
+ HCALL_INST_PRECALL(R4)
+ HVSC
+ HCALL_INST_POSTCALL_NORETS
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+ blr
+#endif
+
+_GLOBAL_TOC(plpar_hcall)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ HCALL_BRANCH(plpar_hcall_trace)
+
+ std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
+
+ mr r4,r5
+ mr r5,r6
+ mr r6,r7
+ mr r7,r8
+ mr r8,r9
+ mr r9,r10
+
+ HVSC /* invoke the hypervisor */
+
+ ld r12,STK_PARAM(R4)(r1)
+ std r4, 0(r12)
+ std r5, 8(r12)
+ std r6, 16(r12)
+ std r7, 24(r12)
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall_trace:
+ HCALL_INST_PRECALL(R5)
+
+ mr r4,r5
+ mr r5,r6
+ mr r6,r7
+ mr r7,r8
+ mr r8,r9
+ mr r9,r10
+
+ HVSC
+
+ ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1)
+ std r4,0(r12)
+ std r5,8(r12)
+ std r6,16(r12)
+ std r7,24(r12)
+
+ HCALL_INST_POSTCALL(r12)
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr
+#endif
+
+/*
+ * plpar_hcall_raw can be called in real mode. kexec/kdump need some
+ * hypervisor calls to be executed in real mode. So plpar_hcall_raw
+ * does not access the per cpu hypervisor call statistics variables,
+ * since these variables may not be present in the RMO region.
+ */
+_GLOBAL(plpar_hcall_raw)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
+
+ mr r4,r5
+ mr r5,r6
+ mr r6,r7
+ mr r7,r8
+ mr r8,r9
+ mr r9,r10
+
+ HVSC /* invoke the hypervisor */
+
+ ld r12,STK_PARAM(R4)(r1)
+ std r4, 0(r12)
+ std r5, 8(r12)
+ std r6, 16(r12)
+ std r7, 24(r12)
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+_GLOBAL_TOC(plpar_hcall9)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ HCALL_BRANCH(plpar_hcall9_trace)
+
+ std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
+
+ mr r4,r5
+ mr r5,r6
+ mr r6,r7
+ mr r7,r8
+ mr r8,r9
+ mr r9,r10
+ ld r10,STK_PARAM(R11)(r1) /* put arg7 in R10 */
+ ld r11,STK_PARAM(R12)(r1) /* put arg8 in R11 */
+ ld r12,STK_PARAM(R13)(r1) /* put arg9 in R12 */
+
+ HVSC /* invoke the hypervisor */
+
+ mr r0,r12
+ ld r12,STK_PARAM(R4)(r1)
+ std r4, 0(r12)
+ std r5, 8(r12)
+ std r6, 16(r12)
+ std r7, 24(r12)
+ std r8, 32(r12)
+ std r9, 40(r12)
+ std r10,48(r12)
+ std r11,56(r12)
+ std r0, 64(r12)
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
+
+#ifdef CONFIG_TRACEPOINTS
+plpar_hcall9_trace:
+ HCALL_INST_PRECALL(R5)
+
+ mr r4,r5
+ mr r5,r6
+ mr r6,r7
+ mr r7,r8
+ mr r8,r9
+ mr r9,r10
+ ld r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R11)(r1)
+ ld r11,STACK_FRAME_MIN_SIZE+STK_PARAM(R12)(r1)
+ ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R13)(r1)
+
+ HVSC
+
+ mr r0,r12
+ ld r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1)
+ std r4,0(r12)
+ std r5,8(r12)
+ std r6,16(r12)
+ std r7,24(r12)
+ std r8,32(r12)
+ std r9,40(r12)
+ std r10,48(r12)
+ std r11,56(r12)
+ std r0,64(r12)
+
+ HCALL_INST_POSTCALL(r12)
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr
+#endif
+
+/* See plpar_hcall_raw to see why this is needed */
+_GLOBAL(plpar_hcall9_raw)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+
+ std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
+
+ mr r4,r5
+ mr r5,r6
+ mr r6,r7
+ mr r7,r8
+ mr r8,r9
+ mr r9,r10
+ ld r10,STK_PARAM(R11)(r1) /* put arg7 in R10 */
+ ld r11,STK_PARAM(R12)(r1) /* put arg8 in R11 */
+ ld r12,STK_PARAM(R13)(r1) /* put arg9 in R12 */
+
+ HVSC /* invoke the hypervisor */
+
+ mr r0,r12
+ ld r12,STK_PARAM(R4)(r1)
+ std r4, 0(r12)
+ std r5, 8(r12)
+ std r6, 16(r12)
+ std r7, 24(r12)
+ std r8, 32(r12)
+ std r9, 40(r12)
+ std r10,48(r12)
+ std r11,56(r12)
+ std r0, 64(r12)
+
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+
+ blr /* return r3 = status */
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
new file mode 100644
index 000000000..3a50612a7
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2006 Mike Kravetz IBM Corporation
+ *
+ * Hypervisor Call Instrumentation
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/cputable.h>
+#include <asm/trace.h>
+#include <asm/machdep.h>
+
+/* For hcall instrumentation. One structure per-hcall, per-CPU */
+struct hcall_stats {
+ unsigned long num_calls; /* number of calls (on this CPU) */
+ unsigned long tb_total; /* total wall time (mftb) of calls. */
+ unsigned long purr_total; /* total cpu time (PURR) of calls. */
+ unsigned long tb_start;
+ unsigned long purr_start;
+};
+#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
+
+static DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
+
+/*
+ * Routines for displaying the statistics in debugfs
+ */
+static void *hc_start(struct seq_file *m, loff_t *pos)
+{
+ if ((int)*pos < (HCALL_STAT_ARRAY_SIZE-1))
+ return (void *)(unsigned long)(*pos + 1);
+
+ return NULL;
+}
+
+static void *hc_next(struct seq_file *m, void *p, loff_t * pos)
+{
+ ++*pos;
+
+ return hc_start(m, pos);
+}
+
+static void hc_stop(struct seq_file *m, void *p)
+{
+}
+
+static int hc_show(struct seq_file *m, void *p)
+{
+ unsigned long h_num = (unsigned long)p;
+ struct hcall_stats *hs = m->private;
+
+ if (hs[h_num].num_calls) {
+ if (cpu_has_feature(CPU_FTR_PURR))
+ seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2,
+ hs[h_num].num_calls,
+ hs[h_num].tb_total,
+ hs[h_num].purr_total);
+ else
+ seq_printf(m, "%lu %lu %lu\n", h_num<<2,
+ hs[h_num].num_calls,
+ hs[h_num].tb_total);
+ }
+
+ return 0;
+}
+
+static const struct seq_operations hcall_inst_sops = {
+ .start = hc_start,
+ .next = hc_next,
+ .stop = hc_stop,
+ .show = hc_show
+};
+
+DEFINE_SEQ_ATTRIBUTE(hcall_inst);
+
+#define HCALL_ROOT_DIR "hcall_inst"
+#define CPU_NAME_BUF_SIZE 32
+
+
+static void probe_hcall_entry(void *ignored, unsigned long opcode, unsigned long *args)
+{
+ struct hcall_stats *h;
+
+ if (opcode > MAX_HCALL_OPCODE)
+ return;
+
+ h = this_cpu_ptr(&hcall_stats[opcode / 4]);
+ h->tb_start = mftb();
+ h->purr_start = mfspr(SPRN_PURR);
+}
+
+static void probe_hcall_exit(void *ignored, unsigned long opcode, long retval,
+ unsigned long *retbuf)
+{
+ struct hcall_stats *h;
+
+ if (opcode > MAX_HCALL_OPCODE)
+ return;
+
+ h = this_cpu_ptr(&hcall_stats[opcode / 4]);
+ h->num_calls++;
+ h->tb_total += mftb() - h->tb_start;
+ h->purr_total += mfspr(SPRN_PURR) - h->purr_start;
+}
+
+static int __init hcall_inst_init(void)
+{
+ struct dentry *hcall_root;
+ char cpu_name_buf[CPU_NAME_BUF_SIZE];
+ int cpu;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ return 0;
+
+ if (register_trace_hcall_entry(probe_hcall_entry, NULL))
+ return -EINVAL;
+
+ if (register_trace_hcall_exit(probe_hcall_exit, NULL)) {
+ unregister_trace_hcall_entry(probe_hcall_entry, NULL);
+ return -EINVAL;
+ }
+
+ hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
+
+ for_each_possible_cpu(cpu) {
+ snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
+ debugfs_create_file(cpu_name_buf, 0444, hcall_root,
+ per_cpu(hcall_stats, cpu),
+ &hcall_inst_fops);
+ }
+
+ return 0;
+}
+machine_device_initcall(pseries, hcall_inst_init);
diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c
new file mode 100644
index 000000000..1ac52963e
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvconsole.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * hvconsole.c
+ * Copyright (C) 2004 Hollis Blanchard, IBM Corporation
+ * Copyright (C) 2004 IBM Corporation
+ *
+ * Additional Author(s):
+ * Ryan S. Arnold <rsa@us.ibm.com>
+ *
+ * LPAR console support.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <asm/hvcall.h>
+#include <asm/hvconsole.h>
+#include <asm/plpar_wrappers.h>
+
+/**
+ * hvc_get_chars - retrieve characters from firmware for denoted vterm adapter
+ * @vtermno: The vtermno or unit_address of the adapter from which to fetch the
+ * data.
+ * @buf: The character buffer into which to put the character data fetched from
+ * firmware.
+ * @count: not used?
+ */
+int hvc_get_chars(uint32_t vtermno, char *buf, int count)
+{
+ long ret;
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ unsigned long *lbuf = (unsigned long *)buf;
+
+ ret = plpar_hcall(H_GET_TERM_CHAR, retbuf, vtermno);
+ lbuf[0] = be64_to_cpu(retbuf[1]);
+ lbuf[1] = be64_to_cpu(retbuf[2]);
+
+ if (ret == H_SUCCESS)
+ return retbuf[0];
+
+ return 0;
+}
+
+EXPORT_SYMBOL(hvc_get_chars);
+
+
+/**
+ * hvc_put_chars: send characters to firmware for denoted vterm adapter
+ * @vtermno: The vtermno or unit_address of the adapter from which the data
+ * originated.
+ * @buf: The character buffer that contains the character data to send to
+ * firmware. Must be at least 16 bytes, even if count is less than 16.
+ * @count: Send this number of characters.
+ */
+int hvc_put_chars(uint32_t vtermno, const char *buf, int count)
+{
+ unsigned long *lbuf = (unsigned long *) buf;
+ long ret;
+
+
+ /* hcall will ret H_PARAMETER if 'count' exceeds firmware max.*/
+ if (count > MAX_VIO_PUT_CHARS)
+ count = MAX_VIO_PUT_CHARS;
+
+ ret = plpar_hcall_norets(H_PUT_TERM_CHAR, vtermno, count,
+ cpu_to_be64(lbuf[0]),
+ cpu_to_be64(lbuf[1]));
+ if (ret == H_SUCCESS)
+ return count;
+ if (ret == H_BUSY)
+ return -EAGAIN;
+ return -EIO;
+}
+
+EXPORT_SYMBOL(hvc_put_chars);
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
new file mode 100644
index 000000000..d48c9c7ce
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * hvcserver.c
+ * Copyright (C) 2004 Ryan S Arnold, IBM Corporation
+ *
+ * PPC64 virtual I/O console server support.
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include <asm/hvcall.h>
+#include <asm/hvcserver.h>
+#include <asm/io.h>
+
+#define HVCS_ARCH_VERSION "1.0.0"
+
+MODULE_AUTHOR("Ryan S. Arnold <rsa@us.ibm.com>");
+MODULE_DESCRIPTION("IBM hvcs ppc64 API");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(HVCS_ARCH_VERSION);
+
+/*
+ * Convert arch specific return codes into relevant errnos. The hvcs
+ * functions aren't performance sensitive, so this conversion isn't an
+ * issue.
+ */
+static int hvcs_convert(long to_convert)
+{
+ switch (to_convert) {
+ case H_SUCCESS:
+ return 0;
+ case H_PARAMETER:
+ return -EINVAL;
+ case H_HARDWARE:
+ return -EIO;
+ case H_BUSY:
+ case H_LONG_BUSY_ORDER_1_MSEC:
+ case H_LONG_BUSY_ORDER_10_MSEC:
+ case H_LONG_BUSY_ORDER_100_MSEC:
+ case H_LONG_BUSY_ORDER_1_SEC:
+ case H_LONG_BUSY_ORDER_10_SEC:
+ case H_LONG_BUSY_ORDER_100_SEC:
+ return -EBUSY;
+ case H_FUNCTION:
+ default:
+ return -EPERM;
+ }
+}
+
+/**
+ * hvcs_free_partner_info - free pi allocated by hvcs_get_partner_info
+ * @head: list_head pointer for an allocated list of partner info structs to
+ * free.
+ *
+ * This function is used to free the partner info list that was returned by
+ * calling hvcs_get_partner_info().
+ */
+int hvcs_free_partner_info(struct list_head *head)
+{
+ struct hvcs_partner_info *pi;
+ struct list_head *element;
+
+ if (!head)
+ return -EINVAL;
+
+ while (!list_empty(head)) {
+ element = head->next;
+ pi = list_entry(element, struct hvcs_partner_info, node);
+ list_del(element);
+ kfree(pi);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(hvcs_free_partner_info);
+
+/* Helper function for hvcs_get_partner_info */
+static int hvcs_next_partner(uint32_t unit_address,
+ unsigned long last_p_partition_ID,
+ unsigned long last_p_unit_address, unsigned long *pi_buff)
+
+{
+ long retval;
+ retval = plpar_hcall_norets(H_VTERM_PARTNER_INFO, unit_address,
+ last_p_partition_ID,
+ last_p_unit_address, virt_to_phys(pi_buff));
+ return hvcs_convert(retval);
+}
+
+/**
+ * hvcs_get_partner_info - Get all of the partner info for a vty-server adapter
+ * @unit_address: The unit_address of the vty-server adapter for which this
+ * function is fetching partner info.
+ * @head: An initialized list_head pointer to an empty list to use to return the
+ * list of partner info fetched from the hypervisor to the caller.
+ * @pi_buff: A page sized buffer pre-allocated prior to calling this function
+ * that is to be used to be used by firmware as an iterator to keep track
+ * of the partner info retrieval.
+ *
+ * This function returns non-zero on success, or if there is no partner info.
+ *
+ * The pi_buff is pre-allocated prior to calling this function because this
+ * function may be called with a spin_lock held and kmalloc of a page is not
+ * recommended as GFP_ATOMIC.
+ *
+ * The first long of this buffer is used to store a partner unit address. The
+ * second long is used to store a partner partition ID and starting at
+ * pi_buff[2] is the 79 character Converged Location Code (diff size than the
+ * unsigned longs, hence the casting mumbo jumbo you see later).
+ *
+ * Invocation of this function should always be followed by an invocation of
+ * hvcs_free_partner_info() using a pointer to the SAME list head instance
+ * that was passed as a parameter to this function.
+ */
+int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
+ unsigned long *pi_buff)
+{
+ /*
+ * Dealt with as longs because of the hcall interface even though the
+ * values are uint32_t.
+ */
+ unsigned long last_p_partition_ID;
+ unsigned long last_p_unit_address;
+ struct hvcs_partner_info *next_partner_info = NULL;
+ int more = 1;
+ int retval;
+
+ /* invalid parameters */
+ if (!head || !pi_buff)
+ return -EINVAL;
+
+ memset(pi_buff, 0x00, PAGE_SIZE);
+ last_p_partition_ID = last_p_unit_address = ~0UL;
+ INIT_LIST_HEAD(head);
+
+ do {
+ retval = hvcs_next_partner(unit_address, last_p_partition_ID,
+ last_p_unit_address, pi_buff);
+ if (retval) {
+ /*
+ * Don't indicate that we've failed if we have
+ * any list elements.
+ */
+ if (!list_empty(head))
+ return 0;
+ return retval;
+ }
+
+ last_p_partition_ID = be64_to_cpu(pi_buff[0]);
+ last_p_unit_address = be64_to_cpu(pi_buff[1]);
+
+ /* This indicates that there are no further partners */
+ if (last_p_partition_ID == ~0UL
+ && last_p_unit_address == ~0UL)
+ break;
+
+ /* This is a very small struct and will be freed soon in
+ * hvcs_free_partner_info(). */
+ next_partner_info = kmalloc(sizeof(struct hvcs_partner_info),
+ GFP_ATOMIC);
+
+ if (!next_partner_info) {
+ printk(KERN_WARNING "HVCONSOLE: kmalloc() failed to"
+ " allocate partner info struct.\n");
+ hvcs_free_partner_info(head);
+ return -ENOMEM;
+ }
+
+ next_partner_info->unit_address
+ = (unsigned int)last_p_unit_address;
+ next_partner_info->partition_ID
+ = (unsigned int)last_p_partition_ID;
+
+ /* copy the Null-term char too */
+ strscpy(&next_partner_info->location_code[0],
+ (char *)&pi_buff[2],
+ sizeof(next_partner_info->location_code));
+
+ list_add_tail(&(next_partner_info->node), head);
+ next_partner_info = NULL;
+
+ } while (more);
+
+ return 0;
+}
+EXPORT_SYMBOL(hvcs_get_partner_info);
+
+/**
+ * hvcs_register_connection - establish a connection between this vty-server and
+ * a vty.
+ * @unit_address: The unit address of the vty-server adapter that is to be
+ * establish a connection.
+ * @p_partition_ID: The partition ID of the vty adapter that is to be connected.
+ * @p_unit_address: The unit address of the vty adapter to which the vty-server
+ * is to be connected.
+ *
+ * If this function is called once and -EINVAL is returned it may
+ * indicate that the partner info needs to be refreshed for the
+ * target unit address at which point the caller must invoke
+ * hvcs_get_partner_info() and then call this function again. If,
+ * for a second time, -EINVAL is returned then it indicates that
+ * there is probably already a partner connection registered to a
+ * different vty-server adapter. It is also possible that a second
+ * -EINVAL may indicate that one of the parms is not valid, for
+ * instance if the link was removed between the vty-server adapter
+ * and the vty adapter that you are trying to open. Don't shoot the
+ * messenger. Firmware implemented it this way.
+ */
+int hvcs_register_connection( uint32_t unit_address,
+ uint32_t p_partition_ID, uint32_t p_unit_address)
+{
+ long retval;
+ retval = plpar_hcall_norets(H_REGISTER_VTERM, unit_address,
+ p_partition_ID, p_unit_address);
+ return hvcs_convert(retval);
+}
+EXPORT_SYMBOL(hvcs_register_connection);
+
+/**
+ * hvcs_free_connection - free the connection between a vty-server and vty
+ * @unit_address: The unit address of the vty-server that is to have its
+ * connection severed.
+ *
+ * This function is used to free the partner connection between a vty-server
+ * adapter and a vty adapter.
+ *
+ * If -EBUSY is returned continue to call this function until 0 is returned.
+ */
+int hvcs_free_connection(uint32_t unit_address)
+{
+ long retval;
+ retval = plpar_hcall_norets(H_FREE_VTERM, unit_address);
+ return hvcs_convert(retval);
+}
+EXPORT_SYMBOL(hvcs_free_connection);
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
new file mode 100644
index 000000000..998e3aff2
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -0,0 +1,479 @@
+/*
+ * IBM PowerPC IBM eBus Infrastructure Support.
+ *
+ * Copyright (c) 2005 IBM Corporation
+ * Joachim Fenkes <fenkes@de.ibm.com>
+ * Heiko J Schick <schickhj@de.ibm.com>
+ *
+ * All rights reserved.
+ *
+ * This source code is distributed under a dual license of GPL v2.0 and OpenIB
+ * BSD.
+ *
+ * OpenIB BSD License
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/console.h>
+#include <linux/kobject.h>
+#include <linux/dma-map-ops.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <asm/ibmebus.h>
+#include <asm/machdep.h>
+
+static struct device ibmebus_bus_device = { /* fake "parent" device */
+ .init_name = "ibmebus",
+};
+
+struct bus_type ibmebus_bus_type;
+
+/* These devices will automatically be added to the bus during init */
+static const struct of_device_id ibmebus_matches[] __initconst = {
+ { .compatible = "IBM,lhca" },
+ { .compatible = "IBM,lhea" },
+ {},
+};
+
+static void *ibmebus_alloc_coherent(struct device *dev,
+ size_t size,
+ dma_addr_t *dma_handle,
+ gfp_t flag,
+ unsigned long attrs)
+{
+ void *mem;
+
+ mem = kmalloc(size, flag);
+ *dma_handle = (dma_addr_t)mem;
+
+ return mem;
+}
+
+static void ibmebus_free_coherent(struct device *dev,
+ size_t size, void *vaddr,
+ dma_addr_t dma_handle,
+ unsigned long attrs)
+{
+ kfree(vaddr);
+}
+
+static dma_addr_t ibmebus_map_page(struct device *dev,
+ struct page *page,
+ unsigned long offset,
+ size_t size,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ return (dma_addr_t)(page_address(page) + offset);
+}
+
+static void ibmebus_unmap_page(struct device *dev,
+ dma_addr_t dma_addr,
+ size_t size,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ return;
+}
+
+static int ibmebus_map_sg(struct device *dev,
+ struct scatterlist *sgl,
+ int nents, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nents, i) {
+ sg->dma_address = (dma_addr_t) sg_virt(sg);
+ sg->dma_length = sg->length;
+ }
+
+ return nents;
+}
+
+static void ibmebus_unmap_sg(struct device *dev,
+ struct scatterlist *sg,
+ int nents, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ return;
+}
+
+static int ibmebus_dma_supported(struct device *dev, u64 mask)
+{
+ return mask == DMA_BIT_MASK(64);
+}
+
+static u64 ibmebus_dma_get_required_mask(struct device *dev)
+{
+ return DMA_BIT_MASK(64);
+}
+
+static const struct dma_map_ops ibmebus_dma_ops = {
+ .alloc = ibmebus_alloc_coherent,
+ .free = ibmebus_free_coherent,
+ .map_sg = ibmebus_map_sg,
+ .unmap_sg = ibmebus_unmap_sg,
+ .dma_supported = ibmebus_dma_supported,
+ .get_required_mask = ibmebus_dma_get_required_mask,
+ .map_page = ibmebus_map_page,
+ .unmap_page = ibmebus_unmap_page,
+};
+
+static int ibmebus_match_path(struct device *dev, const void *data)
+{
+ struct device_node *dn = to_platform_device(dev)->dev.of_node;
+ struct device_node *tn = of_find_node_by_path(data);
+
+ of_node_put(tn);
+
+ return (tn == dn);
+}
+
+static int ibmebus_match_node(struct device *dev, const void *data)
+{
+ return to_platform_device(dev)->dev.of_node == data;
+}
+
+static int ibmebus_create_device(struct device_node *dn)
+{
+ struct platform_device *dev;
+ int ret;
+
+ dev = of_device_alloc(dn, NULL, &ibmebus_bus_device);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->dev.bus = &ibmebus_bus_type;
+ dev->dev.dma_ops = &ibmebus_dma_ops;
+
+ ret = of_device_add(dev);
+ if (ret)
+ platform_device_put(dev);
+ return ret;
+}
+
+static int ibmebus_create_devices(const struct of_device_id *matches)
+{
+ struct device_node *root, *child;
+ struct device *dev;
+ int ret = 0;
+
+ root = of_find_node_by_path("/");
+
+ for_each_child_of_node(root, child) {
+ if (!of_match_node(matches, child))
+ continue;
+
+ dev = bus_find_device(&ibmebus_bus_type, NULL, child,
+ ibmebus_match_node);
+ if (dev) {
+ put_device(dev);
+ continue;
+ }
+
+ ret = ibmebus_create_device(child);
+ if (ret) {
+ printk(KERN_ERR "%s: failed to create device (%i)",
+ __func__, ret);
+ of_node_put(child);
+ break;
+ }
+ }
+
+ of_node_put(root);
+ return ret;
+}
+
+int ibmebus_register_driver(struct platform_driver *drv)
+{
+ /* If the driver uses devices that ibmebus doesn't know, add them */
+ ibmebus_create_devices(drv->driver.of_match_table);
+
+ drv->driver.bus = &ibmebus_bus_type;
+ return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL(ibmebus_register_driver);
+
+void ibmebus_unregister_driver(struct platform_driver *drv)
+{
+ driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL(ibmebus_unregister_driver);
+
+int ibmebus_request_irq(u32 ist, irq_handler_t handler,
+ unsigned long irq_flags, const char *devname,
+ void *dev_id)
+{
+ unsigned int irq = irq_create_mapping(NULL, ist);
+
+ if (!irq)
+ return -EINVAL;
+
+ return request_irq(irq, handler, irq_flags, devname, dev_id);
+}
+EXPORT_SYMBOL(ibmebus_request_irq);
+
+void ibmebus_free_irq(u32 ist, void *dev_id)
+{
+ unsigned int irq = irq_find_mapping(NULL, ist);
+
+ free_irq(irq, dev_id);
+ irq_dispose_mapping(irq);
+}
+EXPORT_SYMBOL(ibmebus_free_irq);
+
+static char *ibmebus_chomp(const char *in, size_t count)
+{
+ char *out = kmalloc(count + 1, GFP_KERNEL);
+
+ if (!out)
+ return NULL;
+
+ memcpy(out, in, count);
+ out[count] = '\0';
+ if (out[count - 1] == '\n')
+ out[count - 1] = '\0';
+
+ return out;
+}
+
+static ssize_t probe_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+ struct device_node *dn = NULL;
+ struct device *dev;
+ char *path;
+ ssize_t rc = 0;
+
+ path = ibmebus_chomp(buf, count);
+ if (!path)
+ return -ENOMEM;
+
+ dev = bus_find_device(&ibmebus_bus_type, NULL, path,
+ ibmebus_match_path);
+ if (dev) {
+ put_device(dev);
+ printk(KERN_WARNING "%s: %s has already been probed\n",
+ __func__, path);
+ rc = -EEXIST;
+ goto out;
+ }
+
+ if ((dn = of_find_node_by_path(path))) {
+ rc = ibmebus_create_device(dn);
+ of_node_put(dn);
+ } else {
+ printk(KERN_WARNING "%s: no such device node: %s\n",
+ __func__, path);
+ rc = -ENODEV;
+ }
+
+out:
+ kfree(path);
+ if (rc)
+ return rc;
+ return count;
+}
+static BUS_ATTR_WO(probe);
+
+static ssize_t remove_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+ struct device *dev;
+ char *path;
+
+ path = ibmebus_chomp(buf, count);
+ if (!path)
+ return -ENOMEM;
+
+ if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path,
+ ibmebus_match_path))) {
+ of_device_unregister(to_platform_device(dev));
+ put_device(dev);
+
+ kfree(path);
+ return count;
+ } else {
+ printk(KERN_WARNING "%s: %s not on the bus\n",
+ __func__, path);
+
+ kfree(path);
+ return -ENODEV;
+ }
+}
+static BUS_ATTR_WO(remove);
+
+static struct attribute *ibmbus_bus_attrs[] = {
+ &bus_attr_probe.attr,
+ &bus_attr_remove.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(ibmbus_bus);
+
+static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv)
+{
+ const struct of_device_id *matches = drv->of_match_table;
+
+ if (!matches)
+ return 0;
+
+ return of_match_device(matches, dev) != NULL;
+}
+
+static int ibmebus_bus_device_probe(struct device *dev)
+{
+ int error = -ENODEV;
+ struct platform_driver *drv;
+ struct platform_device *of_dev;
+
+ drv = to_platform_driver(dev->driver);
+ of_dev = to_platform_device(dev);
+
+ if (!drv->probe)
+ return error;
+
+ get_device(dev);
+
+ if (of_driver_match_device(dev, dev->driver))
+ error = drv->probe(of_dev);
+ if (error)
+ put_device(dev);
+
+ return error;
+}
+
+static void ibmebus_bus_device_remove(struct device *dev)
+{
+ struct platform_device *of_dev = to_platform_device(dev);
+ struct platform_driver *drv = to_platform_driver(dev->driver);
+
+ if (dev->driver && drv->remove)
+ drv->remove(of_dev);
+}
+
+static void ibmebus_bus_device_shutdown(struct device *dev)
+{
+ struct platform_device *of_dev = to_platform_device(dev);
+ struct platform_driver *drv = to_platform_driver(dev->driver);
+
+ if (dev->driver && drv->shutdown)
+ drv->shutdown(of_dev);
+}
+
+/*
+ * ibmebus_bus_device_attrs
+ */
+static ssize_t devspec_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *ofdev;
+
+ ofdev = to_platform_device(dev);
+ return sprintf(buf, "%pOF\n", ofdev->dev.of_node);
+}
+static DEVICE_ATTR_RO(devspec);
+
+static ssize_t name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct platform_device *ofdev;
+
+ ofdev = to_platform_device(dev);
+ return sprintf(buf, "%pOFn\n", ofdev->dev.of_node);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t modalias_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return of_device_modalias(dev, buf, PAGE_SIZE);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *ibmebus_bus_device_attrs[] = {
+ &dev_attr_devspec.attr,
+ &dev_attr_name.attr,
+ &dev_attr_modalias.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(ibmebus_bus_device);
+
+static int ibmebus_bus_modalias(const struct device *dev, struct kobj_uevent_env *env)
+{
+ return of_device_uevent_modalias(dev, env);
+}
+
+struct bus_type ibmebus_bus_type = {
+ .name = "ibmebus",
+ .uevent = ibmebus_bus_modalias,
+ .bus_groups = ibmbus_bus_groups,
+ .match = ibmebus_bus_bus_match,
+ .probe = ibmebus_bus_device_probe,
+ .remove = ibmebus_bus_device_remove,
+ .shutdown = ibmebus_bus_device_shutdown,
+ .dev_groups = ibmebus_bus_device_groups,
+};
+EXPORT_SYMBOL(ibmebus_bus_type);
+
+static int __init ibmebus_bus_init(void)
+{
+ int err;
+
+ printk(KERN_INFO "IBM eBus Device Driver\n");
+
+ err = bus_register(&ibmebus_bus_type);
+ if (err) {
+ printk(KERN_ERR "%s: failed to register IBM eBus.\n",
+ __func__);
+ return err;
+ }
+
+ err = device_register(&ibmebus_bus_device);
+ if (err) {
+ printk(KERN_WARNING "%s: device_register returned %i\n",
+ __func__, err);
+ put_device(&ibmebus_bus_device);
+ bus_unregister(&ibmebus_bus_type);
+
+ return err;
+ }
+
+ err = ibmebus_create_devices(ibmebus_matches);
+ if (err) {
+ device_unregister(&ibmebus_bus_device);
+ bus_unregister(&ibmebus_bus_type);
+ return err;
+ }
+
+ return 0;
+}
+machine_postcore_initcall(pseries, ibmebus_bus_init);
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
new file mode 100644
index 000000000..f411d4fe7
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2010 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/list.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/irq.h>
+#include <asm/io_event_irq.h>
+
+#include "pseries.h"
+
+/*
+ * IO event interrupt is a mechanism provided by RTAS to return
+ * information about hardware error and non-error events. Device
+ * drivers can register their event handlers to receive events.
+ * Device drivers are expected to use atomic_notifier_chain_register()
+ * and atomic_notifier_chain_unregister() to register and unregister
+ * their event handlers. Since multiple IO event types and scopes
+ * share an IO event interrupt, the event handlers are called one
+ * by one until the IO event is claimed by one of the handlers.
+ * The event handlers are expected to return NOTIFY_OK if the
+ * event is handled by the event handler or NOTIFY_DONE if the
+ * event does not belong to the handler.
+ *
+ * Usage:
+ *
+ * Notifier function:
+ * #include <asm/io_event_irq.h>
+ * int event_handler(struct notifier_block *nb, unsigned long val, void *data) {
+ * p = (struct pseries_io_event_sect_data *) data;
+ * if (! is_my_event(p->scope, p->event_type)) return NOTIFY_DONE;
+ * :
+ * :
+ * return NOTIFY_OK;
+ * }
+ * struct notifier_block event_nb = {
+ * .notifier_call = event_handler,
+ * }
+ *
+ * Registration:
+ * atomic_notifier_chain_register(&pseries_ioei_notifier_list, &event_nb);
+ *
+ * Unregistration:
+ * atomic_notifier_chain_unregister(&pseries_ioei_notifier_list, &event_nb);
+ */
+
+ATOMIC_NOTIFIER_HEAD(pseries_ioei_notifier_list);
+EXPORT_SYMBOL_GPL(pseries_ioei_notifier_list);
+
+static int ioei_check_exception_token;
+
+static char ioei_rtas_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
+
+/**
+ * Find the data portion of an IO Event section from event log.
+ * @elog: RTAS error/event log.
+ *
+ * Return:
+ * pointer to a valid IO event section data. NULL if not found.
+ */
+static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
+{
+ struct pseries_errorlog *sect;
+
+ /* We should only ever get called for io-event interrupts, but if
+ * we do get called for another type then something went wrong so
+ * make some noise about it.
+ * RTAS_TYPE_IO only exists in extended event log version 6 or later.
+ * No need to check event log version.
+ */
+ if (unlikely(rtas_error_type(elog) != RTAS_TYPE_IO)) {
+ printk_once(KERN_WARNING"io_event_irq: Unexpected event type %d",
+ rtas_error_type(elog));
+ return NULL;
+ }
+
+ sect = get_pseries_errorlog(elog, PSERIES_ELOG_SECT_ID_IO_EVENT);
+ if (unlikely(!sect)) {
+ printk_once(KERN_WARNING "io_event_irq: RTAS extended event "
+ "log does not contain an IO Event section. "
+ "Could be a bug in system firmware!\n");
+ return NULL;
+ }
+ return (struct pseries_io_event *) &sect->data;
+}
+
+/*
+ * PAPR:
+ * - check-exception returns the first found error or event and clear that
+ * error or event so it is reported once.
+ * - Each interrupt returns one event. If a plateform chooses to report
+ * multiple events through a single interrupt, it must ensure that the
+ * interrupt remains asserted until check-exception has been used to
+ * process all out-standing events for that interrupt.
+ *
+ * Implementation notes:
+ * - Events must be processed in the order they are returned. Hence,
+ * sequential in nature.
+ * - The owner of an event is determined by combinations of scope,
+ * event type, and sub-type. There is no easy way to pre-sort clients
+ * by scope or event type alone. For example, Torrent ISR route change
+ * event is reported with scope 0x00 (Not Applicable) rather than
+ * 0x3B (Torrent-hub). It is better to let the clients to identify
+ * who owns the event.
+ */
+
+static irqreturn_t ioei_interrupt(int irq, void *dev_id)
+{
+ struct pseries_io_event *event;
+ int rtas_rc;
+
+ for (;;) {
+ rtas_rc = rtas_call(ioei_check_exception_token, 6, 1, NULL,
+ RTAS_VECTOR_EXTERNAL_INTERRUPT,
+ virq_to_hw(irq),
+ RTAS_IO_EVENTS, 1 /* Time Critical */,
+ __pa(ioei_rtas_buf),
+ RTAS_DATA_BUF_SIZE);
+ if (rtas_rc != 0)
+ break;
+
+ event = ioei_find_event((struct rtas_error_log *)ioei_rtas_buf);
+ if (!event)
+ continue;
+
+ atomic_notifier_call_chain(&pseries_ioei_notifier_list,
+ 0, event);
+ }
+ return IRQ_HANDLED;
+}
+
+static int __init ioei_init(void)
+{
+ struct device_node *np;
+
+ ioei_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION);
+ if (ioei_check_exception_token == RTAS_UNKNOWN_SERVICE)
+ return -ENODEV;
+
+ np = of_find_node_by_path("/event-sources/ibm,io-events");
+ if (np) {
+ request_event_sources_irqs(np, ioei_interrupt, "IO_EVENT");
+ pr_info("IBM I/O event interrupts enabled\n");
+ of_node_put(np);
+ } else {
+ return -ENODEV;
+ }
+ return 0;
+}
+machine_subsys_initcall(pseries, ioei_init);
+
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
new file mode 100644
index 000000000..496e16c58
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -0,0 +1,1742 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
+ *
+ * Rewrite, cleanup:
+ *
+ * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
+ * Copyright (C) 2006 Olof Johansson <olof@lixom.net>
+ *
+ * Dynamic DMA mapping support, pSeries-specific parts, both SMP and LPAR.
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/iommu.h>
+#include <linux/rculist.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/tce.h>
+#include <asm/ppc-pci.h>
+#include <asm/udbg.h>
+#include <asm/mmzone.h>
+#include <asm/plpar_wrappers.h>
+
+#include "pseries.h"
+
+enum {
+ DDW_QUERY_PE_DMA_WIN = 0,
+ DDW_CREATE_PE_DMA_WIN = 1,
+ DDW_REMOVE_PE_DMA_WIN = 2,
+
+ DDW_APPLICABLE_SIZE
+};
+
+enum {
+ DDW_EXT_SIZE = 0,
+ DDW_EXT_RESET_DMA_WIN = 1,
+ DDW_EXT_QUERY_OUT_SIZE = 2
+};
+
+static struct iommu_table *iommu_pseries_alloc_table(int node)
+{
+ struct iommu_table *tbl;
+
+ tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, node);
+ if (!tbl)
+ return NULL;
+
+ INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+ kref_init(&tbl->it_kref);
+ return tbl;
+}
+
+static struct iommu_table_group *iommu_pseries_alloc_group(int node)
+{
+ struct iommu_table_group *table_group;
+
+ table_group = kzalloc_node(sizeof(*table_group), GFP_KERNEL, node);
+ if (!table_group)
+ return NULL;
+
+#ifdef CONFIG_IOMMU_API
+ table_group->ops = &spapr_tce_table_group_ops;
+ table_group->pgsizes = SZ_4K;
+#endif
+
+ table_group->tables[0] = iommu_pseries_alloc_table(node);
+ if (table_group->tables[0])
+ return table_group;
+
+ kfree(table_group);
+ return NULL;
+}
+
+static void iommu_pseries_free_group(struct iommu_table_group *table_group,
+ const char *node_name)
+{
+ if (!table_group)
+ return;
+
+#ifdef CONFIG_IOMMU_API
+ if (table_group->group) {
+ iommu_group_put(table_group->group);
+ BUG_ON(table_group->group);
+ }
+#endif
+
+ /* Default DMA window table is at index 0, while DDW at 1. SR-IOV
+ * adapters only have table on index 1.
+ */
+ if (table_group->tables[0])
+ iommu_tce_table_put(table_group->tables[0]);
+
+ if (table_group->tables[1])
+ iommu_tce_table_put(table_group->tables[1]);
+
+ kfree(table_group);
+}
+
+static int tce_build_pSeries(struct iommu_table *tbl, long index,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ u64 proto_tce;
+ __be64 *tcep;
+ u64 rpn;
+ const unsigned long tceshift = tbl->it_page_shift;
+ const unsigned long pagesize = IOMMU_PAGE_SIZE(tbl);
+
+ proto_tce = TCE_PCI_READ; // Read allowed
+
+ if (direction != DMA_TO_DEVICE)
+ proto_tce |= TCE_PCI_WRITE;
+
+ tcep = ((__be64 *)tbl->it_base) + index;
+
+ while (npages--) {
+ /* can't move this out since we might cross MEMBLOCK boundary */
+ rpn = __pa(uaddr) >> tceshift;
+ *tcep = cpu_to_be64(proto_tce | rpn << tceshift);
+
+ uaddr += pagesize;
+ tcep++;
+ }
+ return 0;
+}
+
+
+static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
+{
+ __be64 *tcep;
+
+ tcep = ((__be64 *)tbl->it_base) + index;
+
+ while (npages--)
+ *(tcep++) = 0;
+}
+
+static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
+{
+ __be64 *tcep;
+
+ tcep = ((__be64 *)tbl->it_base) + index;
+
+ return be64_to_cpu(*tcep);
+}
+
+static void tce_free_pSeriesLP(unsigned long liobn, long, long, long);
+static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long);
+
+static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ u64 rc = 0;
+ u64 proto_tce, tce;
+ u64 rpn;
+ int ret = 0;
+ long tcenum_start = tcenum, npages_start = npages;
+
+ rpn = __pa(uaddr) >> tceshift;
+ proto_tce = TCE_PCI_READ;
+ if (direction != DMA_TO_DEVICE)
+ proto_tce |= TCE_PCI_WRITE;
+
+ while (npages--) {
+ tce = proto_tce | rpn << tceshift;
+ rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce);
+
+ if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+ ret = (int)rc;
+ tce_free_pSeriesLP(liobn, tcenum_start, tceshift,
+ (npages_start - (npages + 1)));
+ break;
+ }
+
+ if (rc && printk_ratelimit()) {
+ printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
+ printk("\tindex = 0x%llx\n", (u64)liobn);
+ printk("\ttcenum = 0x%llx\n", (u64)tcenum);
+ printk("\ttce val = 0x%llx\n", tce );
+ dump_stack();
+ }
+
+ tcenum++;
+ rpn++;
+ }
+ return ret;
+}
+
+static DEFINE_PER_CPU(__be64 *, tce_page);
+
+static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ long npages, unsigned long uaddr,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ u64 rc = 0;
+ u64 proto_tce;
+ __be64 *tcep;
+ u64 rpn;
+ long l, limit;
+ long tcenum_start = tcenum, npages_start = npages;
+ int ret = 0;
+ unsigned long flags;
+ const unsigned long tceshift = tbl->it_page_shift;
+
+ if ((npages == 1) || !firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
+ return tce_build_pSeriesLP(tbl->it_index, tcenum,
+ tceshift, npages, uaddr,
+ direction, attrs);
+ }
+
+ local_irq_save(flags); /* to protect tcep and the page behind it */
+
+ tcep = __this_cpu_read(tce_page);
+
+ /* This is safe to do since interrupts are off when we're called
+ * from iommu_alloc{,_sg}()
+ */
+ if (!tcep) {
+ tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+ /* If allocation fails, fall back to the loop implementation */
+ if (!tcep) {
+ local_irq_restore(flags);
+ return tce_build_pSeriesLP(tbl->it_index, tcenum,
+ tceshift,
+ npages, uaddr, direction, attrs);
+ }
+ __this_cpu_write(tce_page, tcep);
+ }
+
+ rpn = __pa(uaddr) >> tceshift;
+ proto_tce = TCE_PCI_READ;
+ if (direction != DMA_TO_DEVICE)
+ proto_tce |= TCE_PCI_WRITE;
+
+ /* We can map max one pageful of TCEs at a time */
+ do {
+ /*
+ * Set up the page with TCE data, looping through and setting
+ * the values.
+ */
+ limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE);
+
+ for (l = 0; l < limit; l++) {
+ tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift);
+ rpn++;
+ }
+
+ rc = plpar_tce_put_indirect((u64)tbl->it_index,
+ (u64)tcenum << tceshift,
+ (u64)__pa(tcep),
+ limit);
+
+ npages -= limit;
+ tcenum += limit;
+ } while (npages > 0 && !rc);
+
+ local_irq_restore(flags);
+
+ if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) {
+ ret = (int)rc;
+ tce_freemulti_pSeriesLP(tbl, tcenum_start,
+ (npages_start - (npages + limit)));
+ return ret;
+ }
+
+ if (rc && printk_ratelimit()) {
+ printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
+ printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
+ printk("\tnpages = 0x%llx\n", (u64)npages);
+ printk("\ttce[0] val = 0x%llx\n", tcep[0]);
+ dump_stack();
+ }
+ return ret;
+}
+
+static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long tceshift,
+ long npages)
+{
+ u64 rc;
+
+ while (npages--) {
+ rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, 0);
+
+ if (rc && printk_ratelimit()) {
+ printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc);
+ printk("\tindex = 0x%llx\n", (u64)liobn);
+ printk("\ttcenum = 0x%llx\n", (u64)tcenum);
+ dump_stack();
+ }
+
+ tcenum++;
+ }
+}
+
+
+static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages)
+{
+ u64 rc;
+ long rpages = npages;
+ unsigned long limit;
+
+ if (!firmware_has_feature(FW_FEATURE_STUFF_TCE))
+ return tce_free_pSeriesLP(tbl->it_index, tcenum,
+ tbl->it_page_shift, npages);
+
+ do {
+ limit = min_t(unsigned long, rpages, 512);
+
+ rc = plpar_tce_stuff((u64)tbl->it_index,
+ (u64)tcenum << tbl->it_page_shift, 0, limit);
+
+ rpages -= limit;
+ tcenum += limit;
+ } while (rpages > 0 && !rc);
+
+ if (rc && printk_ratelimit()) {
+ printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n");
+ printk("\trc = %lld\n", rc);
+ printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
+ printk("\tnpages = 0x%llx\n", (u64)npages);
+ dump_stack();
+ }
+}
+
+static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
+{
+ u64 rc;
+ unsigned long tce_ret;
+
+ rc = plpar_tce_get((u64)tbl->it_index,
+ (u64)tcenum << tbl->it_page_shift, &tce_ret);
+
+ if (rc && printk_ratelimit()) {
+ printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc);
+ printk("\tindex = 0x%llx\n", (u64)tbl->it_index);
+ printk("\ttcenum = 0x%llx\n", (u64)tcenum);
+ dump_stack();
+ }
+
+ return tce_ret;
+}
+
+/* this is compatible with cells for the device tree property */
+struct dynamic_dma_window_prop {
+ __be32 liobn; /* tce table number */
+ __be64 dma_base; /* address hi,lo */
+ __be32 tce_shift; /* ilog2(tce_page_size) */
+ __be32 window_shift; /* ilog2(tce_window_size) */
+};
+
+struct dma_win {
+ struct device_node *device;
+ const struct dynamic_dma_window_prop *prop;
+ bool direct;
+ struct list_head list;
+};
+
+/* Dynamic DMA Window support */
+struct ddw_query_response {
+ u32 windows_available;
+ u64 largest_available_block;
+ u32 page_size;
+ u32 migration_capable;
+};
+
+struct ddw_create_response {
+ u32 liobn;
+ u32 addr_hi;
+ u32 addr_lo;
+};
+
+static LIST_HEAD(dma_win_list);
+/* prevents races between memory on/offline and window creation */
+static DEFINE_SPINLOCK(dma_win_list_lock);
+/* protects initializing window twice for same device */
+static DEFINE_MUTEX(dma_win_init_mutex);
+
+static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
+ unsigned long num_pfn, const void *arg)
+{
+ const struct dynamic_dma_window_prop *maprange = arg;
+ int rc;
+ u64 tce_size, num_tce, dma_offset, next;
+ u32 tce_shift;
+ long limit;
+
+ tce_shift = be32_to_cpu(maprange->tce_shift);
+ tce_size = 1ULL << tce_shift;
+ next = start_pfn << PAGE_SHIFT;
+ num_tce = num_pfn << PAGE_SHIFT;
+
+ /* round back to the beginning of the tce page size */
+ num_tce += next & (tce_size - 1);
+ next &= ~(tce_size - 1);
+
+ /* covert to number of tces */
+ num_tce |= tce_size - 1;
+ num_tce >>= tce_shift;
+
+ do {
+ /*
+ * Set up the page with TCE data, looping through and setting
+ * the values.
+ */
+ limit = min_t(long, num_tce, 512);
+ dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+ rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
+ dma_offset,
+ 0, limit);
+ next += limit * tce_size;
+ num_tce -= limit;
+ } while (num_tce > 0 && !rc);
+
+ return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+ unsigned long num_pfn, const void *arg)
+{
+ const struct dynamic_dma_window_prop *maprange = arg;
+ u64 tce_size, num_tce, dma_offset, next, proto_tce, liobn;
+ __be64 *tcep;
+ u32 tce_shift;
+ u64 rc = 0;
+ long l, limit;
+
+ if (!firmware_has_feature(FW_FEATURE_PUT_TCE_IND)) {
+ unsigned long tceshift = be32_to_cpu(maprange->tce_shift);
+ unsigned long dmastart = (start_pfn << PAGE_SHIFT) +
+ be64_to_cpu(maprange->dma_base);
+ unsigned long tcenum = dmastart >> tceshift;
+ unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift;
+ void *uaddr = __va(start_pfn << PAGE_SHIFT);
+
+ return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn),
+ tcenum, tceshift, npages, (unsigned long) uaddr,
+ DMA_BIDIRECTIONAL, 0);
+ }
+
+ local_irq_disable(); /* to protect tcep and the page behind it */
+ tcep = __this_cpu_read(tce_page);
+
+ if (!tcep) {
+ tcep = (__be64 *)__get_free_page(GFP_ATOMIC);
+ if (!tcep) {
+ local_irq_enable();
+ return -ENOMEM;
+ }
+ __this_cpu_write(tce_page, tcep);
+ }
+
+ proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+
+ liobn = (u64)be32_to_cpu(maprange->liobn);
+ tce_shift = be32_to_cpu(maprange->tce_shift);
+ tce_size = 1ULL << tce_shift;
+ next = start_pfn << PAGE_SHIFT;
+ num_tce = num_pfn << PAGE_SHIFT;
+
+ /* round back to the beginning of the tce page size */
+ num_tce += next & (tce_size - 1);
+ next &= ~(tce_size - 1);
+
+ /* covert to number of tces */
+ num_tce |= tce_size - 1;
+ num_tce >>= tce_shift;
+
+ /* We can map max one pageful of TCEs at a time */
+ do {
+ /*
+ * Set up the page with TCE data, looping through and setting
+ * the values.
+ */
+ limit = min_t(long, num_tce, 4096 / TCE_ENTRY_SIZE);
+ dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+ for (l = 0; l < limit; l++) {
+ tcep[l] = cpu_to_be64(proto_tce | next);
+ next += tce_size;
+ }
+
+ rc = plpar_tce_put_indirect(liobn,
+ dma_offset,
+ (u64)__pa(tcep),
+ limit);
+
+ num_tce -= limit;
+ } while (num_tce > 0 && !rc);
+
+ /* error cleanup: caller will clear whole range */
+
+ local_irq_enable();
+ return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP_walk(unsigned long start_pfn,
+ unsigned long num_pfn, void *arg)
+{
+ return tce_setrange_multi_pSeriesLP(start_pfn, num_pfn, arg);
+}
+
+static void iommu_table_setparms_common(struct iommu_table *tbl, unsigned long busno,
+ unsigned long liobn, unsigned long win_addr,
+ unsigned long window_size, unsigned long page_shift,
+ void *base, struct iommu_table_ops *table_ops)
+{
+ tbl->it_busno = busno;
+ tbl->it_index = liobn;
+ tbl->it_offset = win_addr >> page_shift;
+ tbl->it_size = window_size >> page_shift;
+ tbl->it_page_shift = page_shift;
+ tbl->it_base = (unsigned long)base;
+ tbl->it_blocksize = 16;
+ tbl->it_type = TCE_PCI;
+ tbl->it_ops = table_ops;
+}
+
+struct iommu_table_ops iommu_table_pseries_ops;
+
+static void iommu_table_setparms(struct pci_controller *phb,
+ struct device_node *dn,
+ struct iommu_table *tbl)
+{
+ struct device_node *node;
+ const unsigned long *basep;
+ const u32 *sizep;
+
+ /* Test if we are going over 2GB of DMA space */
+ if (phb->dma_window_base_cur + phb->dma_window_size > SZ_2G) {
+ udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+ panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n");
+ }
+
+ node = phb->dn;
+ basep = of_get_property(node, "linux,tce-base", NULL);
+ sizep = of_get_property(node, "linux,tce-size", NULL);
+ if (basep == NULL || sizep == NULL) {
+ printk(KERN_ERR "PCI_DMA: iommu_table_setparms: %pOF has "
+ "missing tce entries !\n", dn);
+ return;
+ }
+
+ iommu_table_setparms_common(tbl, phb->bus->number, 0, phb->dma_window_base_cur,
+ phb->dma_window_size, IOMMU_PAGE_SHIFT_4K,
+ __va(*basep), &iommu_table_pseries_ops);
+
+ if (!is_kdump_kernel())
+ memset((void *)tbl->it_base, 0, *sizep);
+
+ phb->dma_window_base_cur += phb->dma_window_size;
+}
+
+struct iommu_table_ops iommu_table_lpar_multi_ops;
+
+/*
+ * iommu_table_setparms_lpar
+ *
+ * Function: On pSeries LPAR systems, return TCE table info, given a pci bus.
+ */
+static void iommu_table_setparms_lpar(struct pci_controller *phb,
+ struct device_node *dn,
+ struct iommu_table *tbl,
+ struct iommu_table_group *table_group,
+ const __be32 *dma_window)
+{
+ unsigned long offset, size, liobn;
+
+ of_parse_dma_window(dn, dma_window, &liobn, &offset, &size);
+
+ iommu_table_setparms_common(tbl, phb->bus->number, liobn, offset, size, IOMMU_PAGE_SHIFT_4K, NULL,
+ &iommu_table_lpar_multi_ops);
+
+
+ table_group->tce32_start = offset;
+ table_group->tce32_size = size;
+}
+
+struct iommu_table_ops iommu_table_pseries_ops = {
+ .set = tce_build_pSeries,
+ .clear = tce_free_pSeries,
+ .get = tce_get_pseries
+};
+
+static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
+{
+ struct device_node *dn;
+ struct iommu_table *tbl;
+ struct device_node *isa_dn, *isa_dn_orig;
+ struct device_node *tmp;
+ struct pci_dn *pci;
+ int children;
+
+ dn = pci_bus_to_OF_node(bus);
+
+ pr_debug("pci_dma_bus_setup_pSeries: setting up bus %pOF\n", dn);
+
+ if (bus->self) {
+ /* This is not a root bus, any setup will be done for the
+ * device-side of the bridge in iommu_dev_setup_pSeries().
+ */
+ return;
+ }
+ pci = PCI_DN(dn);
+
+ /* Check if the ISA bus on the system is under
+ * this PHB.
+ */
+ isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa");
+
+ while (isa_dn && isa_dn != dn)
+ isa_dn = isa_dn->parent;
+
+ of_node_put(isa_dn_orig);
+
+ /* Count number of direct PCI children of the PHB. */
+ for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
+ children++;
+
+ pr_debug("Children: %d\n", children);
+
+ /* Calculate amount of DMA window per slot. Each window must be
+ * a power of two (due to pci_alloc_consistent requirements).
+ *
+ * Keep 256MB aside for PHBs with ISA.
+ */
+
+ if (!isa_dn) {
+ /* No ISA/IDE - just set window size and return */
+ pci->phb->dma_window_size = 0x80000000ul; /* To be divided */
+
+ while (pci->phb->dma_window_size * children > 0x80000000ul)
+ pci->phb->dma_window_size >>= 1;
+ pr_debug("No ISA/IDE, window size is 0x%llx\n",
+ pci->phb->dma_window_size);
+ pci->phb->dma_window_base_cur = 0;
+
+ return;
+ }
+
+ /* If we have ISA, then we probably have an IDE
+ * controller too. Allocate a 128MB table but
+ * skip the first 128MB to avoid stepping on ISA
+ * space.
+ */
+ pci->phb->dma_window_size = 0x8000000ul;
+ pci->phb->dma_window_base_cur = 0x8000000ul;
+
+ pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+ tbl = pci->table_group->tables[0];
+
+ iommu_table_setparms(pci->phb, dn, tbl);
+
+ if (!iommu_init_table(tbl, pci->phb->node, 0, 0))
+ panic("Failed to initialize iommu table");
+
+ /* Divide the rest (1.75GB) among the children */
+ pci->phb->dma_window_size = 0x80000000ul;
+ while (pci->phb->dma_window_size * children > 0x70000000ul)
+ pci->phb->dma_window_size >>= 1;
+
+ pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size);
+}
+
+#ifdef CONFIG_IOMMU_API
+static int tce_exchange_pseries(struct iommu_table *tbl, long index, unsigned
+ long *tce, enum dma_data_direction *direction)
+{
+ long rc;
+ unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
+ unsigned long flags, oldtce = 0;
+ u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+ unsigned long newtce = *tce | proto_tce;
+
+ spin_lock_irqsave(&tbl->large_pool.lock, flags);
+
+ rc = plpar_tce_get((u64)tbl->it_index, ioba, &oldtce);
+ if (!rc)
+ rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce);
+
+ if (!rc) {
+ *direction = iommu_tce_direction(oldtce);
+ *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ }
+
+ spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
+
+ return rc;
+}
+#endif
+
+struct iommu_table_ops iommu_table_lpar_multi_ops = {
+ .set = tce_buildmulti_pSeriesLP,
+#ifdef CONFIG_IOMMU_API
+ .xchg_no_kill = tce_exchange_pseries,
+#endif
+ .clear = tce_freemulti_pSeriesLP,
+ .get = tce_get_pSeriesLP
+};
+
+/*
+ * Find nearest ibm,dma-window (default DMA window) or direct DMA window or
+ * dynamic 64bit DMA window, walking up the device tree.
+ */
+static struct device_node *pci_dma_find(struct device_node *dn,
+ const __be32 **dma_window)
+{
+ const __be32 *dw = NULL;
+
+ for ( ; dn && PCI_DN(dn); dn = dn->parent) {
+ dw = of_get_property(dn, "ibm,dma-window", NULL);
+ if (dw) {
+ if (dma_window)
+ *dma_window = dw;
+ return dn;
+ }
+ dw = of_get_property(dn, DIRECT64_PROPNAME, NULL);
+ if (dw)
+ return dn;
+ dw = of_get_property(dn, DMA64_PROPNAME, NULL);
+ if (dw)
+ return dn;
+ }
+
+ return NULL;
+}
+
+static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
+{
+ struct iommu_table *tbl;
+ struct device_node *dn, *pdn;
+ struct pci_dn *ppci;
+ const __be32 *dma_window = NULL;
+
+ dn = pci_bus_to_OF_node(bus);
+
+ pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n",
+ dn);
+
+ pdn = pci_dma_find(dn, &dma_window);
+
+ if (dma_window == NULL)
+ pr_debug(" no ibm,dma-window property !\n");
+
+ ppci = PCI_DN(pdn);
+
+ pr_debug(" parent is %pOF, iommu_table: 0x%p\n",
+ pdn, ppci->table_group);
+
+ if (!ppci->table_group) {
+ ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
+ tbl = ppci->table_group->tables[0];
+ if (dma_window) {
+ iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
+ ppci->table_group, dma_window);
+
+ if (!iommu_init_table(tbl, ppci->phb->node, 0, 0))
+ panic("Failed to initialize iommu table");
+ }
+ iommu_register_group(ppci->table_group,
+ pci_domain_nr(bus), 0);
+ pr_debug(" created table: %p\n", ppci->table_group);
+ }
+}
+
+
+static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
+{
+ struct device_node *dn;
+ struct iommu_table *tbl;
+
+ pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev));
+
+ dn = dev->dev.of_node;
+
+ /* If we're the direct child of a root bus, then we need to allocate
+ * an iommu table ourselves. The bus setup code should have setup
+ * the window sizes already.
+ */
+ if (!dev->bus->self) {
+ struct pci_controller *phb = PCI_DN(dn)->phb;
+
+ pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
+ PCI_DN(dn)->table_group = iommu_pseries_alloc_group(phb->node);
+ tbl = PCI_DN(dn)->table_group->tables[0];
+ iommu_table_setparms(phb, dn, tbl);
+
+ if (!iommu_init_table(tbl, phb->node, 0, 0))
+ panic("Failed to initialize iommu table");
+
+ set_iommu_table_base(&dev->dev, tbl);
+ return;
+ }
+
+ /* If this device is further down the bus tree, search upwards until
+ * an already allocated iommu table is found and use that.
+ */
+
+ while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
+ dn = dn->parent;
+
+ if (dn && PCI_DN(dn))
+ set_iommu_table_base(&dev->dev,
+ PCI_DN(dn)->table_group->tables[0]);
+ else
+ printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
+ pci_name(dev));
+}
+
+static int __read_mostly disable_ddw;
+
+static int __init disable_ddw_setup(char *str)
+{
+ disable_ddw = 1;
+ printk(KERN_INFO "ppc iommu: disabling ddw.\n");
+
+ return 0;
+}
+
+early_param("disable_ddw", disable_ddw_setup);
+
+static void clean_dma_window(struct device_node *np, struct dynamic_dma_window_prop *dwp)
+{
+ int ret;
+
+ ret = tce_clearrange_multi_pSeriesLP(0,
+ 1ULL << (be32_to_cpu(dwp->window_shift) - PAGE_SHIFT), dwp);
+ if (ret)
+ pr_warn("%pOF failed to clear tces in window.\n",
+ np);
+ else
+ pr_debug("%pOF successfully cleared tces in window.\n",
+ np);
+}
+
+/*
+ * Call only if DMA window is clean.
+ */
+static void __remove_dma_window(struct device_node *np, u32 *ddw_avail, u64 liobn)
+{
+ int ret;
+
+ ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
+ if (ret)
+ pr_warn("%pOF: failed to remove DMA window: rtas returned "
+ "%d to ibm,remove-pe-dma-window(%x) %llx\n",
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
+ else
+ pr_debug("%pOF: successfully removed DMA window: rtas returned "
+ "%d to ibm,remove-pe-dma-window(%x) %llx\n",
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
+}
+
+static void remove_dma_window(struct device_node *np, u32 *ddw_avail,
+ struct property *win)
+{
+ struct dynamic_dma_window_prop *dwp;
+ u64 liobn;
+
+ dwp = win->value;
+ liobn = (u64)be32_to_cpu(dwp->liobn);
+
+ clean_dma_window(np, dwp);
+ __remove_dma_window(np, ddw_avail, liobn);
+}
+
+static int remove_ddw(struct device_node *np, bool remove_prop, const char *win_name)
+{
+ struct property *win;
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
+ int ret = 0;
+
+ win = of_find_property(np, win_name, NULL);
+ if (!win)
+ return -EINVAL;
+
+ ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
+ if (ret)
+ return 0;
+
+
+ if (win->length >= sizeof(struct dynamic_dma_window_prop))
+ remove_dma_window(np, ddw_avail, win);
+
+ if (!remove_prop)
+ return 0;
+
+ ret = of_remove_property(np, win);
+ if (ret)
+ pr_warn("%pOF: failed to remove DMA window property: %d\n",
+ np, ret);
+ return 0;
+}
+
+static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, int *window_shift,
+ bool *direct_mapping)
+{
+ struct dma_win *window;
+ const struct dynamic_dma_window_prop *dma64;
+ bool found = false;
+
+ spin_lock(&dma_win_list_lock);
+ /* check if we already created a window and dupe that config if so */
+ list_for_each_entry(window, &dma_win_list, list) {
+ if (window->device == pdn) {
+ dma64 = window->prop;
+ *dma_addr = be64_to_cpu(dma64->dma_base);
+ *window_shift = be32_to_cpu(dma64->window_shift);
+ *direct_mapping = window->direct;
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&dma_win_list_lock);
+
+ return found;
+}
+
+static struct dma_win *ddw_list_new_entry(struct device_node *pdn,
+ const struct dynamic_dma_window_prop *dma64)
+{
+ struct dma_win *window;
+
+ window = kzalloc(sizeof(*window), GFP_KERNEL);
+ if (!window)
+ return NULL;
+
+ window->device = pdn;
+ window->prop = dma64;
+ window->direct = false;
+
+ return window;
+}
+
+static void find_existing_ddw_windows_named(const char *name)
+{
+ int len;
+ struct device_node *pdn;
+ struct dma_win *window;
+ const struct dynamic_dma_window_prop *dma64;
+
+ for_each_node_with_property(pdn, name) {
+ dma64 = of_get_property(pdn, name, &len);
+ if (!dma64 || len < sizeof(*dma64)) {
+ remove_ddw(pdn, true, name);
+ continue;
+ }
+
+ window = ddw_list_new_entry(pdn, dma64);
+ if (!window) {
+ of_node_put(pdn);
+ break;
+ }
+
+ spin_lock(&dma_win_list_lock);
+ list_add(&window->list, &dma_win_list);
+ spin_unlock(&dma_win_list_lock);
+ }
+}
+
+static int find_existing_ddw_windows(void)
+{
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ return 0;
+
+ find_existing_ddw_windows_named(DIRECT64_PROPNAME);
+ find_existing_ddw_windows_named(DMA64_PROPNAME);
+
+ return 0;
+}
+machine_arch_initcall(pseries, find_existing_ddw_windows);
+
+/**
+ * ddw_read_ext - Get the value of an DDW extension
+ * @np: device node from which the extension value is to be read.
+ * @extnum: index number of the extension.
+ * @value: pointer to return value, modified when extension is available.
+ *
+ * Checks if "ibm,ddw-extensions" exists for this node, and get the value
+ * on index 'extnum'.
+ * It can be used only to check if a property exists, passing value == NULL.
+ *
+ * Returns:
+ * 0 if extension successfully read
+ * -EINVAL if the "ibm,ddw-extensions" does not exist,
+ * -ENODATA if "ibm,ddw-extensions" does not have a value, and
+ * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
+ */
+static inline int ddw_read_ext(const struct device_node *np, int extnum,
+ u32 *value)
+{
+ static const char propname[] = "ibm,ddw-extensions";
+ u32 count;
+ int ret;
+
+ ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count);
+ if (ret)
+ return ret;
+
+ if (count < extnum)
+ return -EOVERFLOW;
+
+ if (!value)
+ value = &count;
+
+ return of_property_read_u32_index(np, propname, extnum, value);
+}
+
+static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
+ struct ddw_query_response *query,
+ struct device_node *parent)
+{
+ struct device_node *dn;
+ struct pci_dn *pdn;
+ u32 cfg_addr, ext_query, query_out[5];
+ u64 buid;
+ int ret, out_sz;
+
+ /*
+ * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
+ * output parameters ibm,query-pe-dma-windows will have, ranging from
+ * 5 to 6.
+ */
+ ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query);
+ if (!ret && ext_query == 1)
+ out_sz = 6;
+ else
+ out_sz = 5;
+
+ /*
+ * Get the config address and phb buid of the PE window.
+ * Rely on eeh to retrieve this for us.
+ * Retrieve them from the pci device, not the node with the
+ * dma-window property
+ */
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ buid = pdn->phb->buid;
+ cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
+
+ ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
+ cfg_addr, BUID_HI(buid), BUID_LO(buid));
+
+ switch (out_sz) {
+ case 5:
+ query->windows_available = query_out[0];
+ query->largest_available_block = query_out[1];
+ query->page_size = query_out[2];
+ query->migration_capable = query_out[3];
+ break;
+ case 6:
+ query->windows_available = query_out[0];
+ query->largest_available_block = ((u64)query_out[1] << 32) |
+ query_out[2];
+ query->page_size = query_out[3];
+ query->migration_capable = query_out[4];
+ break;
+ }
+
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d, lb=%llx ps=%x wn=%d\n",
+ ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), ret, query->largest_available_block,
+ query->page_size, query->windows_available);
+
+ return ret;
+}
+
+static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
+ struct ddw_create_response *create, int page_shift,
+ int window_shift)
+{
+ struct device_node *dn;
+ struct pci_dn *pdn;
+ u32 cfg_addr;
+ u64 buid;
+ int ret;
+
+ /*
+ * Get the config address and phb buid of the PE window.
+ * Rely on eeh to retrieve this for us.
+ * Retrieve them from the pci device, not the node with the
+ * dma-window property
+ */
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ buid = pdn->phb->buid;
+ cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
+
+ do {
+ /* extra outputs are LIOBN and dma-addr (hi, lo) */
+ ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4,
+ (u32 *)create, cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), page_shift, window_shift);
+ } while (rtas_busy_delay(ret));
+ dev_info(&dev->dev,
+ "ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
+ "(liobn = 0x%x starting addr = %x %x)\n",
+ ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), page_shift, window_shift, ret, create->liobn,
+ create->addr_hi, create->addr_lo);
+
+ return ret;
+}
+
+struct failed_ddw_pdn {
+ struct device_node *pdn;
+ struct list_head list;
+};
+
+static LIST_HEAD(failed_ddw_pdn_list);
+
+static phys_addr_t ddw_memory_hotplug_max(void)
+{
+ resource_size_t max_addr = memory_hotplug_max();
+ struct device_node *memory;
+
+ for_each_node_by_type(memory, "memory") {
+ struct resource res;
+
+ if (of_address_to_resource(memory, 0, &res))
+ continue;
+
+ max_addr = max_t(resource_size_t, max_addr, res.end + 1);
+ }
+
+ return max_addr;
+}
+
+/*
+ * Platforms supporting the DDW option starting with LoPAR level 2.7 implement
+ * ibm,ddw-extensions, which carries the rtas token for
+ * ibm,reset-pe-dma-windows.
+ * That rtas-call can be used to restore the default DMA window for the device.
+ */
+static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
+{
+ int ret;
+ u32 cfg_addr, reset_dma_win;
+ u64 buid;
+ struct device_node *dn;
+ struct pci_dn *pdn;
+
+ ret = ddw_read_ext(par_dn, DDW_EXT_RESET_DMA_WIN, &reset_dma_win);
+ if (ret)
+ return;
+
+ dn = pci_device_to_OF_node(dev);
+ pdn = PCI_DN(dn);
+ buid = pdn->phb->buid;
+ cfg_addr = (pdn->busno << 16) | (pdn->devfn << 8);
+
+ ret = rtas_call(reset_dma_win, 3, 1, NULL, cfg_addr, BUID_HI(buid),
+ BUID_LO(buid));
+ if (ret)
+ dev_info(&dev->dev,
+ "ibm,reset-pe-dma-windows(%x) %x %x %x returned %d ",
+ reset_dma_win, cfg_addr, BUID_HI(buid), BUID_LO(buid),
+ ret);
+}
+
+/* Return largest page shift based on "IO Page Sizes" output of ibm,query-pe-dma-window. */
+static int iommu_get_page_shift(u32 query_page_size)
+{
+ /* Supported IO page-sizes according to LoPAR, note that 2M is out of order */
+ const int shift[] = {
+ __builtin_ctzll(SZ_4K), __builtin_ctzll(SZ_64K), __builtin_ctzll(SZ_16M),
+ __builtin_ctzll(SZ_32M), __builtin_ctzll(SZ_64M), __builtin_ctzll(SZ_128M),
+ __builtin_ctzll(SZ_256M), __builtin_ctzll(SZ_16G), __builtin_ctzll(SZ_2M)
+ };
+
+ int i = ARRAY_SIZE(shift) - 1;
+ int ret = 0;
+
+ /*
+ * On LoPAR, ibm,query-pe-dma-window outputs "IO Page Sizes" using a bit field:
+ * - bit 31 means 4k pages are supported,
+ * - bit 30 means 64k pages are supported, and so on.
+ * Larger pagesizes map more memory with the same amount of TCEs, so start probing them.
+ */
+ for (; i >= 0 ; i--) {
+ if (query_page_size & (1 << i))
+ ret = max(ret, shift[i]);
+ }
+
+ return ret;
+}
+
+static struct property *ddw_property_create(const char *propname, u32 liobn, u64 dma_addr,
+ u32 page_shift, u32 window_shift)
+{
+ struct dynamic_dma_window_prop *ddwprop;
+ struct property *win64;
+
+ win64 = kzalloc(sizeof(*win64), GFP_KERNEL);
+ if (!win64)
+ return NULL;
+
+ win64->name = kstrdup(propname, GFP_KERNEL);
+ ddwprop = kzalloc(sizeof(*ddwprop), GFP_KERNEL);
+ win64->value = ddwprop;
+ win64->length = sizeof(*ddwprop);
+ if (!win64->name || !win64->value) {
+ kfree(win64->name);
+ kfree(win64->value);
+ kfree(win64);
+ return NULL;
+ }
+
+ ddwprop->liobn = cpu_to_be32(liobn);
+ ddwprop->dma_base = cpu_to_be64(dma_addr);
+ ddwprop->tce_shift = cpu_to_be32(page_shift);
+ ddwprop->window_shift = cpu_to_be32(window_shift);
+
+ return win64;
+}
+
+/*
+ * If the PE supports dynamic dma windows, and there is space for a table
+ * that can map all pages in a linear offset, then setup such a table,
+ * and record the dma-offset in the struct device.
+ *
+ * dev: the pci device we are checking
+ * pdn: the parent pe node with the ibm,dma_window property
+ * Future: also check if we can remap the base window for our base page size
+ *
+ * returns true if can map all pages (direct mapping), false otherwise..
+ */
+static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
+{
+ int len = 0, ret;
+ int max_ram_len = order_base_2(ddw_memory_hotplug_max());
+ struct ddw_query_response query;
+ struct ddw_create_response create;
+ int page_shift;
+ u64 win_addr;
+ const char *win_name;
+ struct device_node *dn;
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
+ struct dma_win *window;
+ struct property *win64;
+ struct failed_ddw_pdn *fpdn;
+ bool default_win_removed = false, direct_mapping = false;
+ bool pmem_present;
+ struct pci_dn *pci = PCI_DN(pdn);
+ struct property *default_win = NULL;
+
+ dn = of_find_node_by_type(NULL, "ibm,pmemory");
+ pmem_present = dn != NULL;
+ of_node_put(dn);
+
+ mutex_lock(&dma_win_init_mutex);
+
+ if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len, &direct_mapping))
+ goto out_unlock;
+
+ /*
+ * If we already went through this for a previous function of
+ * the same device and failed, we don't want to muck with the
+ * DMA window again, as it will race with in-flight operations
+ * and can lead to EEHs. The above mutex protects access to the
+ * list.
+ */
+ list_for_each_entry(fpdn, &failed_ddw_pdn_list, list) {
+ if (fpdn->pdn == pdn)
+ goto out_unlock;
+ }
+
+ /*
+ * the ibm,ddw-applicable property holds the tokens for:
+ * ibm,query-pe-dma-window
+ * ibm,create-pe-dma-window
+ * ibm,remove-pe-dma-window
+ * for the given node in that order.
+ * the property is actually in the parent, not the PE
+ */
+ ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
+ if (ret)
+ goto out_failed;
+
+ /*
+ * Query if there is a second window of size to map the
+ * whole partition. Query returns number of windows, largest
+ * block assigned to PE (partition endpoint), and two bitmasks
+ * of page sizes: supported and supported for migrate-dma.
+ */
+ dn = pci_device_to_OF_node(dev);
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
+ if (ret != 0)
+ goto out_failed;
+
+ /*
+ * If there is no window available, remove the default DMA window,
+ * if it's present. This will make all the resources available to the
+ * new DDW window.
+ * If anything fails after this, we need to restore it, so also check
+ * for extensions presence.
+ */
+ if (query.windows_available == 0) {
+ int reset_win_ext;
+
+ /* DDW + IOMMU on single window may fail if there is any allocation */
+ if (iommu_table_in_use(pci->table_group->tables[0])) {
+ dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
+ goto out_failed;
+ }
+
+ default_win = of_find_property(pdn, "ibm,dma-window", NULL);
+ if (!default_win)
+ goto out_failed;
+
+ reset_win_ext = ddw_read_ext(pdn, DDW_EXT_RESET_DMA_WIN, NULL);
+ if (reset_win_ext)
+ goto out_failed;
+
+ remove_dma_window(pdn, ddw_avail, default_win);
+ default_win_removed = true;
+
+ /* Query again, to check if the window is available */
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
+ if (ret != 0)
+ goto out_failed;
+
+ if (query.windows_available == 0) {
+ /* no windows are available for this device. */
+ dev_dbg(&dev->dev, "no free dynamic windows");
+ goto out_failed;
+ }
+ }
+
+ page_shift = iommu_get_page_shift(query.page_size);
+ if (!page_shift) {
+ dev_dbg(&dev->dev, "no supported page size in mask %x",
+ query.page_size);
+ goto out_failed;
+ }
+
+
+ /*
+ * The "ibm,pmemory" can appear anywhere in the address space.
+ * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
+ * for the upper limit and fallback to max RAM otherwise but this
+ * disables device::dma_ops_bypass.
+ */
+ len = max_ram_len;
+ if (pmem_present) {
+ if (query.largest_available_block >=
+ (1ULL << (MAX_PHYSMEM_BITS - page_shift)))
+ len = MAX_PHYSMEM_BITS;
+ else
+ dev_info(&dev->dev, "Skipping ibm,pmemory");
+ }
+
+ /* check if the available block * number of ptes will map everything */
+ if (query.largest_available_block < (1ULL << (len - page_shift))) {
+ dev_dbg(&dev->dev,
+ "can't map partition max 0x%llx with %llu %llu-sized pages\n",
+ 1ULL << len,
+ query.largest_available_block,
+ 1ULL << page_shift);
+
+ len = order_base_2(query.largest_available_block << page_shift);
+ win_name = DMA64_PROPNAME;
+ } else {
+ direct_mapping = !default_win_removed ||
+ (len == MAX_PHYSMEM_BITS) ||
+ (!pmem_present && (len == max_ram_len));
+ win_name = direct_mapping ? DIRECT64_PROPNAME : DMA64_PROPNAME;
+ }
+
+ ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
+ if (ret != 0)
+ goto out_failed;
+
+ dev_dbg(&dev->dev, "created tce table LIOBN 0x%x for %pOF\n",
+ create.liobn, dn);
+
+ win_addr = ((u64)create.addr_hi << 32) | create.addr_lo;
+ win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len);
+
+ if (!win64) {
+ dev_info(&dev->dev,
+ "couldn't allocate property, property name, or value\n");
+ goto out_remove_win;
+ }
+
+ ret = of_add_property(pdn, win64);
+ if (ret) {
+ dev_err(&dev->dev, "unable to add DMA window property for %pOF: %d",
+ pdn, ret);
+ goto out_free_prop;
+ }
+
+ window = ddw_list_new_entry(pdn, win64->value);
+ if (!window)
+ goto out_del_prop;
+
+ if (direct_mapping) {
+ window->direct = true;
+
+ /* DDW maps the whole partition, so enable direct DMA mapping */
+ ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
+ win64->value, tce_setrange_multi_pSeriesLP_walk);
+ if (ret) {
+ dev_info(&dev->dev, "failed to map DMA window for %pOF: %d\n",
+ dn, ret);
+
+ /* Make sure to clean DDW if any TCE was set*/
+ clean_dma_window(pdn, win64->value);
+ goto out_del_list;
+ }
+ } else {
+ struct iommu_table *newtbl;
+ int i;
+ unsigned long start = 0, end = 0;
+
+ window->direct = false;
+
+ for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
+ const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
+
+ /* Look for MMIO32 */
+ if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM) {
+ start = pci->phb->mem_resources[i].start;
+ end = pci->phb->mem_resources[i].end;
+ break;
+ }
+ }
+
+ /* New table for using DDW instead of the default DMA window */
+ newtbl = iommu_pseries_alloc_table(pci->phb->node);
+ if (!newtbl) {
+ dev_dbg(&dev->dev, "couldn't create new IOMMU table\n");
+ goto out_del_list;
+ }
+
+ iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr,
+ 1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
+ iommu_init_table(newtbl, pci->phb->node, start, end);
+
+ pci->table_group->tables[1] = newtbl;
+
+ set_iommu_table_base(&dev->dev, newtbl);
+ }
+
+ if (default_win_removed) {
+ iommu_tce_table_put(pci->table_group->tables[0]);
+ pci->table_group->tables[0] = NULL;
+
+ /* default_win is valid here because default_win_removed == true */
+ of_remove_property(pdn, default_win);
+ dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn);
+ }
+
+ spin_lock(&dma_win_list_lock);
+ list_add(&window->list, &dma_win_list);
+ spin_unlock(&dma_win_list_lock);
+
+ dev->dev.archdata.dma_offset = win_addr;
+ goto out_unlock;
+
+out_del_list:
+ kfree(window);
+
+out_del_prop:
+ of_remove_property(pdn, win64);
+
+out_free_prop:
+ kfree(win64->name);
+ kfree(win64->value);
+ kfree(win64);
+
+out_remove_win:
+ /* DDW is clean, so it's ok to call this directly. */
+ __remove_dma_window(pdn, ddw_avail, create.liobn);
+
+out_failed:
+ if (default_win_removed)
+ reset_dma_window(dev, pdn);
+
+ fpdn = kzalloc(sizeof(*fpdn), GFP_KERNEL);
+ if (!fpdn)
+ goto out_unlock;
+ fpdn->pdn = pdn;
+ list_add(&fpdn->list, &failed_ddw_pdn_list);
+
+out_unlock:
+ mutex_unlock(&dma_win_init_mutex);
+
+ /*
+ * If we have persistent memory and the window size is only as big
+ * as RAM, then we failed to create a window to cover persistent
+ * memory and need to set the DMA limit.
+ */
+ if (pmem_present && direct_mapping && len == max_ram_len)
+ dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
+
+ return direct_mapping;
+}
+
+static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
+{
+ struct device_node *pdn, *dn;
+ struct iommu_table *tbl;
+ const __be32 *dma_window = NULL;
+ struct pci_dn *pci;
+
+ pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
+
+ /* dev setup for LPAR is a little tricky, since the device tree might
+ * contain the dma-window properties per-device and not necessarily
+ * for the bus. So we need to search upwards in the tree until we
+ * either hit a dma-window property, OR find a parent with a table
+ * already allocated.
+ */
+ dn = pci_device_to_OF_node(dev);
+ pr_debug(" node is %pOF\n", dn);
+
+ pdn = pci_dma_find(dn, &dma_window);
+ if (!pdn || !PCI_DN(pdn)) {
+ printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: "
+ "no DMA window found for pci dev=%s dn=%pOF\n",
+ pci_name(dev), dn);
+ return;
+ }
+ pr_debug(" parent is %pOF\n", pdn);
+
+ pci = PCI_DN(pdn);
+ if (!pci->table_group) {
+ pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
+ tbl = pci->table_group->tables[0];
+ iommu_table_setparms_lpar(pci->phb, pdn, tbl,
+ pci->table_group, dma_window);
+
+ iommu_init_table(tbl, pci->phb->node, 0, 0);
+ iommu_register_group(pci->table_group,
+ pci_domain_nr(pci->phb->bus), 0);
+ pr_debug(" created table: %p\n", pci->table_group);
+ } else {
+ pr_debug(" found DMA window, table: %p\n", pci->table_group);
+ }
+
+ set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
+ iommu_add_device(pci->table_group, &dev->dev);
+}
+
+static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask)
+{
+ struct device_node *dn = pci_device_to_OF_node(pdev), *pdn;
+
+ /* only attempt to use a new window if 64-bit DMA is requested */
+ if (dma_mask < DMA_BIT_MASK(64))
+ return false;
+
+ dev_dbg(&pdev->dev, "node is %pOF\n", dn);
+
+ /*
+ * the device tree might contain the dma-window properties
+ * per-device and not necessarily for the bus. So we need to
+ * search upwards in the tree until we either hit a dma-window
+ * property, OR find a parent with a table already allocated.
+ */
+ pdn = pci_dma_find(dn, NULL);
+ if (pdn && PCI_DN(pdn))
+ return enable_ddw(pdev, pdn);
+
+ return false;
+}
+
+static int iommu_mem_notifier(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct dma_win *window;
+ struct memory_notify *arg = data;
+ int ret = 0;
+
+ switch (action) {
+ case MEM_GOING_ONLINE:
+ spin_lock(&dma_win_list_lock);
+ list_for_each_entry(window, &dma_win_list, list) {
+ if (window->direct) {
+ ret |= tce_setrange_multi_pSeriesLP(arg->start_pfn,
+ arg->nr_pages, window->prop);
+ }
+ /* XXX log error */
+ }
+ spin_unlock(&dma_win_list_lock);
+ break;
+ case MEM_CANCEL_ONLINE:
+ case MEM_OFFLINE:
+ spin_lock(&dma_win_list_lock);
+ list_for_each_entry(window, &dma_win_list, list) {
+ if (window->direct) {
+ ret |= tce_clearrange_multi_pSeriesLP(arg->start_pfn,
+ arg->nr_pages, window->prop);
+ }
+ /* XXX log error */
+ }
+ spin_unlock(&dma_win_list_lock);
+ break;
+ default:
+ break;
+ }
+ if (ret && action != MEM_CANCEL_ONLINE)
+ return NOTIFY_BAD;
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block iommu_mem_nb = {
+ .notifier_call = iommu_mem_notifier,
+};
+
+static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+ int err = NOTIFY_OK;
+ struct of_reconfig_data *rd = data;
+ struct device_node *np = rd->dn;
+ struct pci_dn *pci = PCI_DN(np);
+ struct dma_win *window;
+
+ switch (action) {
+ case OF_RECONFIG_DETACH_NODE:
+ /*
+ * Removing the property will invoke the reconfig
+ * notifier again, which causes dead-lock on the
+ * read-write semaphore of the notifier chain. So
+ * we have to remove the property when releasing
+ * the device node.
+ */
+ if (remove_ddw(np, false, DIRECT64_PROPNAME))
+ remove_ddw(np, false, DMA64_PROPNAME);
+
+ if (pci && pci->table_group)
+ iommu_pseries_free_group(pci->table_group,
+ np->full_name);
+
+ spin_lock(&dma_win_list_lock);
+ list_for_each_entry(window, &dma_win_list, list) {
+ if (window->device == np) {
+ list_del(&window->list);
+ kfree(window);
+ break;
+ }
+ }
+ spin_unlock(&dma_win_list_lock);
+ break;
+ default:
+ err = NOTIFY_DONE;
+ break;
+ }
+ return err;
+}
+
+static struct notifier_block iommu_reconfig_nb = {
+ .notifier_call = iommu_reconfig_notifier,
+};
+
+/* These are called very early. */
+void __init iommu_init_early_pSeries(void)
+{
+ if (of_chosen && of_get_property(of_chosen, "linux,iommu-off", NULL))
+ return;
+
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeriesLP;
+ pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeriesLP;
+ if (!disable_ddw)
+ pseries_pci_controller_ops.iommu_bypass_supported =
+ iommu_bypass_supported_pSeriesLP;
+ } else {
+ pseries_pci_controller_ops.dma_bus_setup = pci_dma_bus_setup_pSeries;
+ pseries_pci_controller_ops.dma_dev_setup = pci_dma_dev_setup_pSeries;
+ }
+
+
+ of_reconfig_notifier_register(&iommu_reconfig_nb);
+ register_memory_notifier(&iommu_mem_nb);
+
+ set_pci_dma_ops(&dma_iommu_ops);
+}
+
+static int __init disable_multitce(char *str)
+{
+ if (strcmp(str, "off") == 0 &&
+ firmware_has_feature(FW_FEATURE_LPAR) &&
+ (firmware_has_feature(FW_FEATURE_PUT_TCE_IND) ||
+ firmware_has_feature(FW_FEATURE_STUFF_TCE))) {
+ printk(KERN_INFO "Disabling MULTITCE firmware feature\n");
+ powerpc_firmware_features &=
+ ~(FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE);
+ }
+ return 1;
+}
+
+__setup("multitce=", disable_multitce);
+
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
+ struct pci_dev *pdev)
+{
+ struct device_node *pdn, *dn = pdev->dev.of_node;
+ struct iommu_group *grp;
+ struct pci_dn *pci;
+
+ pdn = pci_dma_find(dn, NULL);
+ if (!pdn || !PCI_DN(pdn))
+ return ERR_PTR(-ENODEV);
+
+ pci = PCI_DN(pdn);
+ if (!pci->table_group)
+ return ERR_PTR(-ENODEV);
+
+ grp = pci->table_group->group;
+ if (!grp)
+ return ERR_PTR(-ENODEV);
+
+ return iommu_group_ref_get(grp);
+}
+#endif
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c
new file mode 100644
index 000000000..096d09ed8
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2006 Michael Ellerman, IBM Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/firmware.h>
+#include <asm/kexec.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/smp.h>
+#include <asm/plpar_wrappers.h>
+
+#include "pseries.h"
+
+void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
+{
+ /*
+ * Don't risk a hypervisor call if we're crashing
+ * XXX: Why? The hypervisor is not crashing. It might be better
+ * to at least attempt unregister to avoid the hypervisor stepping
+ * on our memory.
+ */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
+ int ret;
+ int cpu = smp_processor_id();
+ int hwcpu = hard_smp_processor_id();
+
+ if (get_lppaca()->dtl_enable_mask) {
+ ret = unregister_dtl(hwcpu);
+ if (ret) {
+ pr_err("WARNING: DTL deregistration for cpu "
+ "%d (hw %d) failed with %d\n",
+ cpu, hwcpu, ret);
+ }
+ }
+
+ ret = unregister_slb_shadow(hwcpu);
+ if (ret) {
+ pr_err("WARNING: SLB shadow buffer deregistration "
+ "for cpu %d (hw %d) failed with %d\n",
+ cpu, hwcpu, ret);
+ }
+
+ ret = unregister_vpa(hwcpu);
+ if (ret) {
+ pr_err("WARNING: VPA deregistration for cpu %d "
+ "(hw %d) failed with %d\n", cpu, hwcpu, ret);
+ }
+ }
+
+ if (xive_enabled()) {
+ xive_teardown_cpu();
+
+ if (!secondary)
+ xive_shutdown();
+ } else
+ xics_kexec_teardown_cpu(secondary);
+}
+
+void pseries_machine_kexec(struct kimage *image)
+{
+ if (firmware_has_feature(FW_FEATURE_SET_MODE))
+ pseries_disable_reloc_on_exc();
+
+ default_machine_kexec(image);
+}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
new file mode 100644
index 000000000..d4d6de062
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -0,0 +1,2026 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * pSeries_lpar.c
+ * Copyright (C) 2001 Todd Inglett, IBM Corporation
+ *
+ * pSeries LPAR support.
+ */
+
+/* Enables debugging of low-level hash table routines - careful! */
+#undef DEBUG
+#define pr_fmt(fmt) "lpar: " fmt
+
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/jump_label.h>
+#include <linux/delay.h>
+#include <linux/stop_machine.h>
+#include <linux/spinlock.h>
+#include <linux/cpuhotplug.h>
+#include <linux/workqueue.h>
+#include <linux/proc_fs.h>
+#include <linux/pgtable.h>
+#include <linux/debugfs.h>
+
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+#include <asm/mmu_context.h>
+#include <asm/iommu.h>
+#include <asm/tlb.h>
+#include <asm/cputable.h>
+#include <asm/papr-sysparm.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/trace.h>
+#include <asm/firmware.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/kexec.h>
+#include <asm/fadump.h>
+#include <asm/dtl.h>
+#include <asm/vphn.h>
+
+#include "pseries.h"
+
+/* Flag bits for H_BULK_REMOVE */
+#define HBR_REQUEST 0x4000000000000000UL
+#define HBR_RESPONSE 0x8000000000000000UL
+#define HBR_END 0xc000000000000000UL
+#define HBR_AVPN 0x0200000000000000UL
+#define HBR_ANDCOND 0x0100000000000000UL
+
+
+/* in hvCall.S */
+EXPORT_SYMBOL(plpar_hcall);
+EXPORT_SYMBOL(plpar_hcall9);
+EXPORT_SYMBOL(plpar_hcall_norets);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+/*
+ * H_BLOCK_REMOVE supported block size for this page size in segment who's base
+ * page size is that page size.
+ *
+ * The first index is the segment base page size, the second one is the actual
+ * page size.
+ */
+static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
+#endif
+
+/*
+ * Due to the involved complexity, and that the current hypervisor is only
+ * returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE
+ * buffer size to 8 size block.
+ */
+#define HBLKRM_SUPPORTED_BLOCK_SIZE 8
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+static u8 dtl_mask = DTL_LOG_PREEMPT;
+#else
+static u8 dtl_mask;
+#endif
+
+void alloc_dtl_buffers(unsigned long *time_limit)
+{
+ int cpu;
+ struct paca_struct *pp;
+ struct dtl_entry *dtl;
+
+ for_each_possible_cpu(cpu) {
+ pp = paca_ptrs[cpu];
+ if (pp->dispatch_log)
+ continue;
+ dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
+ if (!dtl) {
+ pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
+ cpu);
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ pr_warn("Stolen time statistics will be unreliable\n");
+#endif
+ break;
+ }
+
+ pp->dtl_ridx = 0;
+ pp->dispatch_log = dtl;
+ pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
+ pp->dtl_curr = dtl;
+
+ if (time_limit && time_after(jiffies, *time_limit)) {
+ cond_resched();
+ *time_limit = jiffies + HZ;
+ }
+ }
+}
+
+void register_dtl_buffer(int cpu)
+{
+ long ret;
+ struct paca_struct *pp;
+ struct dtl_entry *dtl;
+ int hwcpu = get_hard_smp_processor_id(cpu);
+
+ pp = paca_ptrs[cpu];
+ dtl = pp->dispatch_log;
+ if (dtl && dtl_mask) {
+ pp->dtl_ridx = 0;
+ pp->dtl_curr = dtl;
+ lppaca_of(cpu).dtl_idx = 0;
+
+ /* hypervisor reads buffer length from this field */
+ dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
+ ret = register_dtl(hwcpu, __pa(dtl));
+ if (ret)
+ pr_err("WARNING: DTL registration of cpu %d (hw %d) failed with %ld\n",
+ cpu, hwcpu, ret);
+
+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+ }
+}
+
+#ifdef CONFIG_PPC_SPLPAR
+struct dtl_worker {
+ struct delayed_work work;
+ int cpu;
+};
+
+struct vcpu_dispatch_data {
+ int last_disp_cpu;
+
+ int total_disp;
+
+ int same_cpu_disp;
+ int same_chip_disp;
+ int diff_chip_disp;
+ int far_chip_disp;
+
+ int numa_home_disp;
+ int numa_remote_disp;
+ int numa_far_disp;
+};
+
+/*
+ * This represents the number of cpus in the hypervisor. Since there is no
+ * architected way to discover the number of processors in the host, we
+ * provision for dealing with NR_CPUS. This is currently 2048 by default, and
+ * is sufficient for our purposes. This will need to be tweaked if
+ * CONFIG_NR_CPUS is changed.
+ */
+#define NR_CPUS_H NR_CPUS
+
+DEFINE_RWLOCK(dtl_access_lock);
+static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data);
+static DEFINE_PER_CPU(u64, dtl_entry_ridx);
+static DEFINE_PER_CPU(struct dtl_worker, dtl_workers);
+static enum cpuhp_state dtl_worker_state;
+static DEFINE_MUTEX(dtl_enable_mutex);
+static int vcpudispatch_stats_on __read_mostly;
+static int vcpudispatch_stats_freq = 50;
+static __be32 *vcpu_associativity, *pcpu_associativity;
+
+
+static void free_dtl_buffers(unsigned long *time_limit)
+{
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ int cpu;
+ struct paca_struct *pp;
+
+ for_each_possible_cpu(cpu) {
+ pp = paca_ptrs[cpu];
+ if (!pp->dispatch_log)
+ continue;
+ kmem_cache_free(dtl_cache, pp->dispatch_log);
+ pp->dtl_ridx = 0;
+ pp->dispatch_log = 0;
+ pp->dispatch_log_end = 0;
+ pp->dtl_curr = 0;
+
+ if (time_limit && time_after(jiffies, *time_limit)) {
+ cond_resched();
+ *time_limit = jiffies + HZ;
+ }
+ }
+#endif
+}
+
+static int init_cpu_associativity(void)
+{
+ vcpu_associativity = kcalloc(num_possible_cpus() / threads_per_core,
+ VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL);
+ pcpu_associativity = kcalloc(NR_CPUS_H / threads_per_core,
+ VPHN_ASSOC_BUFSIZE * sizeof(__be32), GFP_KERNEL);
+
+ if (!vcpu_associativity || !pcpu_associativity) {
+ pr_err("error allocating memory for associativity information\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void destroy_cpu_associativity(void)
+{
+ kfree(vcpu_associativity);
+ kfree(pcpu_associativity);
+ vcpu_associativity = pcpu_associativity = 0;
+}
+
+static __be32 *__get_cpu_associativity(int cpu, __be32 *cpu_assoc, int flag)
+{
+ __be32 *assoc;
+ int rc = 0;
+
+ assoc = &cpu_assoc[(int)(cpu / threads_per_core) * VPHN_ASSOC_BUFSIZE];
+ if (!assoc[0]) {
+ rc = hcall_vphn(cpu, flag, &assoc[0]);
+ if (rc)
+ return NULL;
+ }
+
+ return assoc;
+}
+
+static __be32 *get_pcpu_associativity(int cpu)
+{
+ return __get_cpu_associativity(cpu, pcpu_associativity, VPHN_FLAG_PCPU);
+}
+
+static __be32 *get_vcpu_associativity(int cpu)
+{
+ return __get_cpu_associativity(cpu, vcpu_associativity, VPHN_FLAG_VCPU);
+}
+
+static int cpu_relative_dispatch_distance(int last_disp_cpu, int cur_disp_cpu)
+{
+ __be32 *last_disp_cpu_assoc, *cur_disp_cpu_assoc;
+
+ if (last_disp_cpu >= NR_CPUS_H || cur_disp_cpu >= NR_CPUS_H)
+ return -EINVAL;
+
+ last_disp_cpu_assoc = get_pcpu_associativity(last_disp_cpu);
+ cur_disp_cpu_assoc = get_pcpu_associativity(cur_disp_cpu);
+
+ if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc)
+ return -EIO;
+
+ return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
+}
+
+static int cpu_home_node_dispatch_distance(int disp_cpu)
+{
+ __be32 *disp_cpu_assoc, *vcpu_assoc;
+ int vcpu_id = smp_processor_id();
+
+ if (disp_cpu >= NR_CPUS_H) {
+ pr_debug_ratelimited("vcpu dispatch cpu %d > %d\n",
+ disp_cpu, NR_CPUS_H);
+ return -EINVAL;
+ }
+
+ disp_cpu_assoc = get_pcpu_associativity(disp_cpu);
+ vcpu_assoc = get_vcpu_associativity(vcpu_id);
+
+ if (!disp_cpu_assoc || !vcpu_assoc)
+ return -EIO;
+
+ return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc);
+}
+
+static void update_vcpu_disp_stat(int disp_cpu)
+{
+ struct vcpu_dispatch_data *disp;
+ int distance;
+
+ disp = this_cpu_ptr(&vcpu_disp_data);
+ if (disp->last_disp_cpu == -1) {
+ disp->last_disp_cpu = disp_cpu;
+ return;
+ }
+
+ disp->total_disp++;
+
+ if (disp->last_disp_cpu == disp_cpu ||
+ (cpu_first_thread_sibling(disp->last_disp_cpu) ==
+ cpu_first_thread_sibling(disp_cpu)))
+ disp->same_cpu_disp++;
+ else {
+ distance = cpu_relative_dispatch_distance(disp->last_disp_cpu,
+ disp_cpu);
+ if (distance < 0)
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n",
+ smp_processor_id());
+ else {
+ switch (distance) {
+ case 0:
+ disp->same_chip_disp++;
+ break;
+ case 1:
+ disp->diff_chip_disp++;
+ break;
+ case 2:
+ disp->far_chip_disp++;
+ break;
+ default:
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d (%d -> %d): unexpected relative dispatch distance %d\n",
+ smp_processor_id(),
+ disp->last_disp_cpu,
+ disp_cpu,
+ distance);
+ }
+ }
+ }
+
+ distance = cpu_home_node_dispatch_distance(disp_cpu);
+ if (distance < 0)
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d: error determining associativity\n",
+ smp_processor_id());
+ else {
+ switch (distance) {
+ case 0:
+ disp->numa_home_disp++;
+ break;
+ case 1:
+ disp->numa_remote_disp++;
+ break;
+ case 2:
+ disp->numa_far_disp++;
+ break;
+ default:
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d on %d: unexpected numa dispatch distance %d\n",
+ smp_processor_id(),
+ disp_cpu,
+ distance);
+ }
+ }
+
+ disp->last_disp_cpu = disp_cpu;
+}
+
+static void process_dtl_buffer(struct work_struct *work)
+{
+ struct dtl_entry dtle;
+ u64 i = __this_cpu_read(dtl_entry_ridx);
+ struct dtl_entry *dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+ struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
+ struct lppaca *vpa = local_paca->lppaca_ptr;
+ struct dtl_worker *d = container_of(work, struct dtl_worker, work.work);
+
+ if (!local_paca->dispatch_log)
+ return;
+
+ /* if we have been migrated away, we cancel ourself */
+ if (d->cpu != smp_processor_id()) {
+ pr_debug("vcpudispatch_stats: cpu %d worker migrated -- canceling worker\n",
+ smp_processor_id());
+ return;
+ }
+
+ if (i == be64_to_cpu(vpa->dtl_idx))
+ goto out;
+
+ while (i < be64_to_cpu(vpa->dtl_idx)) {
+ dtle = *dtl;
+ barrier();
+ if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
+ /* buffer has overflowed */
+ pr_debug_ratelimited("vcpudispatch_stats: cpu %d lost %lld DTL samples\n",
+ d->cpu,
+ be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG - i);
+ i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
+ dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+ continue;
+ }
+ update_vcpu_disp_stat(be16_to_cpu(dtle.processor_id));
+ ++i;
+ ++dtl;
+ if (dtl == dtl_end)
+ dtl = local_paca->dispatch_log;
+ }
+
+ __this_cpu_write(dtl_entry_ridx, i);
+
+out:
+ schedule_delayed_work_on(d->cpu, to_delayed_work(work),
+ HZ / vcpudispatch_stats_freq);
+}
+
+static int dtl_worker_online(unsigned int cpu)
+{
+ struct dtl_worker *d = &per_cpu(dtl_workers, cpu);
+
+ memset(d, 0, sizeof(*d));
+ INIT_DELAYED_WORK(&d->work, process_dtl_buffer);
+ d->cpu = cpu;
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ per_cpu(dtl_entry_ridx, cpu) = 0;
+ register_dtl_buffer(cpu);
+#else
+ per_cpu(dtl_entry_ridx, cpu) = be64_to_cpu(lppaca_of(cpu).dtl_idx);
+#endif
+
+ schedule_delayed_work_on(cpu, &d->work, HZ / vcpudispatch_stats_freq);
+ return 0;
+}
+
+static int dtl_worker_offline(unsigned int cpu)
+{
+ struct dtl_worker *d = &per_cpu(dtl_workers, cpu);
+
+ cancel_delayed_work_sync(&d->work);
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ unregister_dtl(get_hard_smp_processor_id(cpu));
+#endif
+
+ return 0;
+}
+
+static void set_global_dtl_mask(u8 mask)
+{
+ int cpu;
+
+ dtl_mask = mask;
+ for_each_present_cpu(cpu)
+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+}
+
+static void reset_global_dtl_mask(void)
+{
+ int cpu;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ dtl_mask = DTL_LOG_PREEMPT;
+#else
+ dtl_mask = 0;
+#endif
+ for_each_present_cpu(cpu)
+ lppaca_of(cpu).dtl_enable_mask = dtl_mask;
+}
+
+static int dtl_worker_enable(unsigned long *time_limit)
+{
+ int rc = 0, state;
+
+ if (!write_trylock(&dtl_access_lock)) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ set_global_dtl_mask(DTL_LOG_ALL);
+
+ /* Setup dtl buffers and register those */
+ alloc_dtl_buffers(time_limit);
+
+ state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/dtl:online",
+ dtl_worker_online, dtl_worker_offline);
+ if (state < 0) {
+ pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n");
+ free_dtl_buffers(time_limit);
+ reset_global_dtl_mask();
+ write_unlock(&dtl_access_lock);
+ rc = -EINVAL;
+ goto out;
+ }
+ dtl_worker_state = state;
+
+out:
+ return rc;
+}
+
+static void dtl_worker_disable(unsigned long *time_limit)
+{
+ cpuhp_remove_state(dtl_worker_state);
+ free_dtl_buffers(time_limit);
+ reset_global_dtl_mask();
+ write_unlock(&dtl_access_lock);
+}
+
+static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p,
+ size_t count, loff_t *ppos)
+{
+ unsigned long time_limit = jiffies + HZ;
+ struct vcpu_dispatch_data *disp;
+ int rc, cmd, cpu;
+ char buf[16];
+
+ if (count > 15)
+ return -EINVAL;
+
+ if (copy_from_user(buf, p, count))
+ return -EFAULT;
+
+ buf[count] = 0;
+ rc = kstrtoint(buf, 0, &cmd);
+ if (rc || cmd < 0 || cmd > 1) {
+ pr_err("vcpudispatch_stats: please use 0 to disable or 1 to enable dispatch statistics\n");
+ return rc ? rc : -EINVAL;
+ }
+
+ mutex_lock(&dtl_enable_mutex);
+
+ if ((cmd == 0 && !vcpudispatch_stats_on) ||
+ (cmd == 1 && vcpudispatch_stats_on))
+ goto out;
+
+ if (cmd) {
+ rc = init_cpu_associativity();
+ if (rc) {
+ destroy_cpu_associativity();
+ goto out;
+ }
+
+ for_each_possible_cpu(cpu) {
+ disp = per_cpu_ptr(&vcpu_disp_data, cpu);
+ memset(disp, 0, sizeof(*disp));
+ disp->last_disp_cpu = -1;
+ }
+
+ rc = dtl_worker_enable(&time_limit);
+ if (rc) {
+ destroy_cpu_associativity();
+ goto out;
+ }
+ } else {
+ dtl_worker_disable(&time_limit);
+ destroy_cpu_associativity();
+ }
+
+ vcpudispatch_stats_on = cmd;
+
+out:
+ mutex_unlock(&dtl_enable_mutex);
+ if (rc)
+ return rc;
+ return count;
+}
+
+static int vcpudispatch_stats_display(struct seq_file *p, void *v)
+{
+ int cpu;
+ struct vcpu_dispatch_data *disp;
+
+ if (!vcpudispatch_stats_on) {
+ seq_puts(p, "off\n");
+ return 0;
+ }
+
+ for_each_online_cpu(cpu) {
+ disp = per_cpu_ptr(&vcpu_disp_data, cpu);
+ seq_printf(p, "cpu%d", cpu);
+ seq_put_decimal_ull(p, " ", disp->total_disp);
+ seq_put_decimal_ull(p, " ", disp->same_cpu_disp);
+ seq_put_decimal_ull(p, " ", disp->same_chip_disp);
+ seq_put_decimal_ull(p, " ", disp->diff_chip_disp);
+ seq_put_decimal_ull(p, " ", disp->far_chip_disp);
+ seq_put_decimal_ull(p, " ", disp->numa_home_disp);
+ seq_put_decimal_ull(p, " ", disp->numa_remote_disp);
+ seq_put_decimal_ull(p, " ", disp->numa_far_disp);
+ seq_puts(p, "\n");
+ }
+
+ return 0;
+}
+
+static int vcpudispatch_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, vcpudispatch_stats_display, NULL);
+}
+
+static const struct proc_ops vcpudispatch_stats_proc_ops = {
+ .proc_open = vcpudispatch_stats_open,
+ .proc_read = seq_read,
+ .proc_write = vcpudispatch_stats_write,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+};
+
+static ssize_t vcpudispatch_stats_freq_write(struct file *file,
+ const char __user *p, size_t count, loff_t *ppos)
+{
+ int rc, freq;
+ char buf[16];
+
+ if (count > 15)
+ return -EINVAL;
+
+ if (copy_from_user(buf, p, count))
+ return -EFAULT;
+
+ buf[count] = 0;
+ rc = kstrtoint(buf, 0, &freq);
+ if (rc || freq < 1 || freq > HZ) {
+ pr_err("vcpudispatch_stats_freq: please specify a frequency between 1 and %d\n",
+ HZ);
+ return rc ? rc : -EINVAL;
+ }
+
+ vcpudispatch_stats_freq = freq;
+
+ return count;
+}
+
+static int vcpudispatch_stats_freq_display(struct seq_file *p, void *v)
+{
+ seq_printf(p, "%d\n", vcpudispatch_stats_freq);
+ return 0;
+}
+
+static int vcpudispatch_stats_freq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, vcpudispatch_stats_freq_display, NULL);
+}
+
+static const struct proc_ops vcpudispatch_stats_freq_proc_ops = {
+ .proc_open = vcpudispatch_stats_freq_open,
+ .proc_read = seq_read,
+ .proc_write = vcpudispatch_stats_freq_write,
+ .proc_lseek = seq_lseek,
+ .proc_release = single_release,
+};
+
+static int __init vcpudispatch_stats_procfs_init(void)
+{
+ if (!lppaca_shared_proc())
+ return 0;
+
+ if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL,
+ &vcpudispatch_stats_proc_ops))
+ pr_err("vcpudispatch_stats: error creating procfs file\n");
+ else if (!proc_create("powerpc/vcpudispatch_stats_freq", 0600, NULL,
+ &vcpudispatch_stats_freq_proc_ops))
+ pr_err("vcpudispatch_stats_freq: error creating procfs file\n");
+
+ return 0;
+}
+
+machine_device_initcall(pseries, vcpudispatch_stats_procfs_init);
+
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+u64 pseries_paravirt_steal_clock(int cpu)
+{
+ struct lppaca *lppaca = &lppaca_of(cpu);
+
+ return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
+ be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb));
+}
+#endif
+
+#endif /* CONFIG_PPC_SPLPAR */
+
+void vpa_init(int cpu)
+{
+ int hwcpu = get_hard_smp_processor_id(cpu);
+ unsigned long addr;
+ long ret;
+
+ /*
+ * The spec says it "may be problematic" if CPU x registers the VPA of
+ * CPU y. We should never do that, but wail if we ever do.
+ */
+ WARN_ON(cpu != smp_processor_id());
+
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ lppaca_of(cpu).vmxregs_in_use = 1;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ lppaca_of(cpu).ebb_regs_in_use = 1;
+
+ addr = __pa(&lppaca_of(cpu));
+ ret = register_vpa(hwcpu, addr);
+
+ if (ret) {
+ pr_err("WARNING: VPA registration for cpu %d (hw %d) of area "
+ "%lx failed with %ld\n", cpu, hwcpu, addr, ret);
+ return;
+ }
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ /*
+ * PAPR says this feature is SLB-Buffer but firmware never
+ * reports that. All SPLPAR support SLB shadow buffer.
+ */
+ if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr);
+ ret = register_slb_shadow(hwcpu, addr);
+ if (ret)
+ pr_err("WARNING: SLB shadow buffer registration for "
+ "cpu %d (hw %d) of area %lx failed with %ld\n",
+ cpu, hwcpu, addr, ret);
+ }
+#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+ /*
+ * Register dispatch trace log, if one has been allocated.
+ */
+ register_dtl_buffer(cpu);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+static int __init pseries_lpar_register_process_table(unsigned long base,
+ unsigned long page_size, unsigned long table_size)
+{
+ long rc;
+ unsigned long flags = 0;
+
+ if (table_size)
+ flags |= PROC_TABLE_NEW;
+ if (radix_enabled()) {
+ flags |= PROC_TABLE_RADIX;
+ if (mmu_has_feature(MMU_FTR_GTSE))
+ flags |= PROC_TABLE_GTSE;
+ } else
+ flags |= PROC_TABLE_HPT_SLB;
+ for (;;) {
+ rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
+ page_size, table_size);
+ if (!H_IS_LONG_BUSY(rc))
+ break;
+ mdelay(get_longbusy_msecs(rc));
+ }
+ if (rc != H_SUCCESS) {
+ pr_err("Failed to register process table (rc=%ld)\n", rc);
+ BUG();
+ }
+ return rc;
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+
+static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
+ unsigned long vpn, unsigned long pa,
+ unsigned long rflags, unsigned long vflags,
+ int psize, int apsize, int ssize)
+{
+ unsigned long lpar_rc;
+ unsigned long flags;
+ unsigned long slot;
+ unsigned long hpte_v, hpte_r;
+
+ if (!(vflags & HPTE_V_BOLTED))
+ pr_devel("hpte_insert(group=%lx, vpn=%016lx, "
+ "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n",
+ hpte_group, vpn, pa, rflags, vflags, psize);
+
+ hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
+ hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
+
+ if (!(vflags & HPTE_V_BOLTED))
+ pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+
+ /* Now fill in the actual HPTE */
+ /* Set CEC cookie to 0 */
+ /* Zero page = 0 */
+ /* I-cache Invalidate = 0 */
+ /* I-cache synchronize = 0 */
+ /* Exact = 0 */
+ flags = 0;
+
+ if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
+ flags |= H_COALESCE_CAND;
+
+ lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
+ if (unlikely(lpar_rc == H_PTEG_FULL)) {
+ pr_devel("Hash table group is full\n");
+ return -1;
+ }
+
+ /*
+ * Since we try and ioremap PHBs we don't own, the pte insert
+ * will fail. However we must catch the failure in hash_page
+ * or we will loop forever, so return -2 in this case.
+ */
+ if (unlikely(lpar_rc != H_SUCCESS)) {
+ pr_err("Failed hash pte insert with error %ld\n", lpar_rc);
+ return -2;
+ }
+ if (!(vflags & HPTE_V_BOLTED))
+ pr_devel(" -> slot: %lu\n", slot & 7);
+
+ /* Because of iSeries, we have to pass down the secondary
+ * bucket bit here as well
+ */
+ return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3);
+}
+
+static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock);
+
+static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
+{
+ unsigned long slot_offset;
+ unsigned long lpar_rc;
+ int i;
+ unsigned long dummy1, dummy2;
+
+ /* pick a random slot to start at */
+ slot_offset = mftb() & 0x7;
+
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+
+ /* don't remove a bolted entry */
+ lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
+ HPTE_V_BOLTED, &dummy1, &dummy2);
+ if (lpar_rc == H_SUCCESS)
+ return i;
+
+ /*
+ * The test for adjunct partition is performed before the
+ * ANDCOND test. H_RESOURCE may be returned, so we need to
+ * check for that as well.
+ */
+ BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE);
+
+ slot_offset++;
+ slot_offset &= 0x7;
+ }
+
+ return -1;
+}
+
+/* Called during kexec sequence with MMU off */
+static notrace void manual_hpte_clear_all(void)
+{
+ unsigned long size_bytes = 1UL << ppc64_pft_size;
+ unsigned long hpte_count = size_bytes >> 4;
+ struct {
+ unsigned long pteh;
+ unsigned long ptel;
+ } ptes[4];
+ long lpar_rc;
+ unsigned long i, j;
+
+ /* Read in batches of 4,
+ * invalidate only valid entries not in the VRMA
+ * hpte_count will be a multiple of 4
+ */
+ for (i = 0; i < hpte_count; i += 4) {
+ lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes);
+ if (lpar_rc != H_SUCCESS) {
+ pr_info("Failed to read hash page table at %ld err %ld\n",
+ i, lpar_rc);
+ continue;
+ }
+ for (j = 0; j < 4; j++){
+ if ((ptes[j].pteh & HPTE_V_VRMA_MASK) ==
+ HPTE_V_VRMA_MASK)
+ continue;
+ if (ptes[j].pteh & HPTE_V_VALID)
+ plpar_pte_remove_raw(0, i + j, 0,
+ &(ptes[j].pteh), &(ptes[j].ptel));
+ }
+ }
+}
+
+/* Called during kexec sequence with MMU off */
+static notrace int hcall_hpte_clear_all(void)
+{
+ int rc;
+
+ do {
+ rc = plpar_hcall_norets(H_CLEAR_HPT);
+ } while (rc == H_CONTINUE);
+
+ return rc;
+}
+
+/* Called during kexec sequence with MMU off */
+static notrace void pseries_hpte_clear_all(void)
+{
+ int rc;
+
+ rc = hcall_hpte_clear_all();
+ if (rc != H_SUCCESS)
+ manual_hpte_clear_all();
+
+#ifdef __LITTLE_ENDIAN__
+ /*
+ * Reset exceptions to big endian.
+ *
+ * FIXME this is a hack for kexec, we need to reset the exception
+ * endian before starting the new kernel and this is a convenient place
+ * to do it.
+ *
+ * This is also called on boot when a fadump happens. In that case we
+ * must not change the exception endian mode.
+ */
+ if (firmware_has_feature(FW_FEATURE_SET_MODE) && !is_fadump_active())
+ pseries_big_endian_exceptions();
+#endif
+}
+
+/*
+ * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
+ * the low 3 bits of flags happen to line up. So no transform is needed.
+ * We can probably optimize here and assume the high bits of newpp are
+ * already zero. For now I am paranoid.
+ */
+static long pSeries_lpar_hpte_updatepp(unsigned long slot,
+ unsigned long newpp,
+ unsigned long vpn,
+ int psize, int apsize,
+ int ssize, unsigned long inv_flags)
+{
+ unsigned long lpar_rc;
+ unsigned long flags;
+ unsigned long want_v;
+
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+ flags = (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO)) | H_AVPN;
+ flags |= (newpp & HPTE_R_KEY_HI) >> 48;
+ if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+ /* Move pp0 into bit 8 (IBM 55) */
+ flags |= (newpp & HPTE_R_PP0) >> 55;
+
+ pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
+ want_v, slot, flags, psize);
+
+ lpar_rc = plpar_pte_protect(flags, slot, want_v);
+
+ if (lpar_rc == H_NOT_FOUND) {
+ pr_devel("not found !\n");
+ return -1;
+ }
+
+ pr_devel("ok\n");
+
+ BUG_ON(lpar_rc != H_SUCCESS);
+
+ return 0;
+}
+
+static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_group)
+{
+ long lpar_rc;
+ unsigned long i, j;
+ struct {
+ unsigned long pteh;
+ unsigned long ptel;
+ } ptes[4];
+
+ for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
+
+ lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
+ if (lpar_rc != H_SUCCESS) {
+ pr_info("Failed to read hash page table at %ld err %ld\n",
+ hpte_group, lpar_rc);
+ continue;
+ }
+
+ for (j = 0; j < 4; j++) {
+ if (HPTE_V_COMPARE(ptes[j].pteh, want_v) &&
+ (ptes[j].pteh & HPTE_V_VALID))
+ return i + j;
+ }
+ }
+
+ return -1;
+}
+
+static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
+{
+ long slot;
+ unsigned long hash;
+ unsigned long want_v;
+ unsigned long hpte_group;
+
+ hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+ /*
+ * We try to keep bolted entries always in primary hash
+ * But in some case we can find them in secondary too.
+ */
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
+ if (slot < 0) {
+ /* Try in secondary */
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
+ if (slot < 0)
+ return -1;
+ }
+ return hpte_group + slot;
+}
+
+static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
+ unsigned long ea,
+ int psize, int ssize)
+{
+ unsigned long vpn;
+ unsigned long lpar_rc, slot, vsid, flags;
+
+ vsid = get_kernel_vsid(ea, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
+
+ slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
+ BUG_ON(slot == -1);
+
+ flags = newpp & (HPTE_R_PP | HPTE_R_N);
+ if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+ /* Move pp0 into bit 8 (IBM 55) */
+ flags |= (newpp & HPTE_R_PP0) >> 55;
+
+ flags |= ((newpp & HPTE_R_KEY_HI) >> 48) | (newpp & HPTE_R_KEY_LO);
+
+ lpar_rc = plpar_pte_protect(flags, slot, 0);
+
+ BUG_ON(lpar_rc != H_SUCCESS);
+}
+
+static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
+ int psize, int apsize,
+ int ssize, int local)
+{
+ unsigned long want_v;
+ unsigned long lpar_rc;
+ unsigned long dummy1, dummy2;
+
+ pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
+ slot, vpn, psize, local);
+
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+ lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2);
+ if (lpar_rc == H_NOT_FOUND)
+ return;
+
+ BUG_ON(lpar_rc != H_SUCCESS);
+}
+
+
+/*
+ * As defined in the PAPR's section 14.5.4.1.8
+ * The control mask doesn't include the returned reference and change bit from
+ * the processed PTE.
+ */
+#define HBLKR_AVPN 0x0100000000000000UL
+#define HBLKR_CTRL_MASK 0xf800000000000000UL
+#define HBLKR_CTRL_SUCCESS 0x8000000000000000UL
+#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
+#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL
+
+/*
+ * Returned true if we are supporting this block size for the specified segment
+ * base page size and actual page size.
+ *
+ * Currently, we only support 8 size block.
+ */
+static inline bool is_supported_hlbkrm(int bpsize, int psize)
+{
+ return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE);
+}
+
+/**
+ * H_BLOCK_REMOVE caller.
+ * @idx should point to the latest @param entry set with a PTEX.
+ * If PTE cannot be processed because another CPUs has already locked that
+ * group, those entries are put back in @param starting at index 1.
+ * If entries has to be retried and @retry_busy is set to true, these entries
+ * are retried until success. If @retry_busy is set to false, the returned
+ * is the number of entries yet to process.
+ */
+static unsigned long call_block_remove(unsigned long idx, unsigned long *param,
+ bool retry_busy)
+{
+ unsigned long i, rc, new_idx;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+ if (idx < 2) {
+ pr_warn("Unexpected empty call to H_BLOCK_REMOVE");
+ return 0;
+ }
+again:
+ new_idx = 0;
+ if (idx > PLPAR_HCALL9_BUFSIZE) {
+ pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx);
+ idx = PLPAR_HCALL9_BUFSIZE;
+ } else if (idx < PLPAR_HCALL9_BUFSIZE)
+ param[idx] = HBR_END;
+
+ rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf,
+ param[0], /* AVA */
+ param[1], param[2], param[3], param[4], /* TS0-7 */
+ param[5], param[6], param[7], param[8]);
+ if (rc == H_SUCCESS)
+ return 0;
+
+ BUG_ON(rc != H_PARTIAL);
+
+ /* Check that the unprocessed entries were 'not found' or 'busy' */
+ for (i = 0; i < idx-1; i++) {
+ unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK;
+
+ if (ctrl == HBLKR_CTRL_ERRBUSY) {
+ param[++new_idx] = param[i+1];
+ continue;
+ }
+
+ BUG_ON(ctrl != HBLKR_CTRL_SUCCESS
+ && ctrl != HBLKR_CTRL_ERRNOTFOUND);
+ }
+
+ /*
+ * If there were entries found busy, retry these entries if requested,
+ * of if all the entries have to be retried.
+ */
+ if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) {
+ idx = new_idx + 1;
+ goto again;
+ }
+
+ return new_idx;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
+ * to make sure that we avoid bouncing the hypervisor tlbie lock.
+ */
+#define PPC64_HUGE_HPTE_BATCH 12
+
+static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn,
+ int count, int psize, int ssize)
+{
+ unsigned long param[PLPAR_HCALL9_BUFSIZE];
+ unsigned long shift, current_vpgb, vpgb;
+ int i, pix = 0;
+
+ shift = mmu_psize_defs[psize].shift;
+
+ for (i = 0; i < count; i++) {
+ /*
+ * Shifting 3 bits more on the right to get a
+ * 8 pages aligned virtual addresse.
+ */
+ vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3));
+ if (!pix || vpgb != current_vpgb) {
+ /*
+ * Need to start a new 8 pages block, flush
+ * the current one if needed.
+ */
+ if (pix)
+ (void)call_block_remove(pix, param, true);
+ current_vpgb = vpgb;
+ param[0] = hpte_encode_avpn(vpn[i], psize, ssize);
+ pix = 1;
+ }
+
+ param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i];
+ if (pix == PLPAR_HCALL9_BUFSIZE) {
+ pix = call_block_remove(pix, param, false);
+ /*
+ * pix = 0 means that all the entries were
+ * removed, we can start a new block.
+ * Otherwise, this means that there are entries
+ * to retry, and pix points to latest one, so
+ * we should increment it and try to continue
+ * the same block.
+ */
+ if (pix)
+ pix++;
+ }
+ }
+ if (pix)
+ (void)call_block_remove(pix, param, true);
+}
+
+static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn,
+ int count, int psize, int ssize)
+{
+ unsigned long param[PLPAR_HCALL9_BUFSIZE];
+ int i = 0, pix = 0, rc;
+
+ for (i = 0; i < count; i++) {
+
+ if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+ pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0,
+ ssize, 0);
+ } else {
+ param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
+ param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
+ pix += 2;
+ if (pix == 8) {
+ rc = plpar_hcall9(H_BULK_REMOVE, param,
+ param[0], param[1], param[2],
+ param[3], param[4], param[5],
+ param[6], param[7]);
+ BUG_ON(rc != H_SUCCESS);
+ pix = 0;
+ }
+ }
+ }
+ if (pix) {
+ param[pix] = HBR_END;
+ rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
+ param[2], param[3], param[4], param[5],
+ param[6], param[7]);
+ BUG_ON(rc != H_SUCCESS);
+ }
+}
+
+static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
+ unsigned long *vpn,
+ int count, int psize,
+ int ssize)
+{
+ unsigned long flags = 0;
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+ if (lock_tlbie)
+ spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+ /* Assuming THP size is 16M */
+ if (is_supported_hlbkrm(psize, MMU_PAGE_16M))
+ hugepage_block_invalidate(slot, vpn, count, psize, ssize);
+ else
+ hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
+
+ if (lock_tlbie)
+ spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+
+static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
+ unsigned long addr,
+ unsigned char *hpte_slot_array,
+ int psize, int ssize, int local)
+{
+ int i, index = 0;
+ unsigned long s_addr = addr;
+ unsigned int max_hpte_count, valid;
+ unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
+ unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
+ unsigned long shift, hidx, vpn = 0, hash, slot;
+
+ shift = mmu_psize_defs[psize].shift;
+ max_hpte_count = 1U << (PMD_SHIFT - shift);
+
+ for (i = 0; i < max_hpte_count; i++) {
+ valid = hpte_valid(hpte_slot_array, i);
+ if (!valid)
+ continue;
+ hidx = hpte_hash_index(hpte_slot_array, i);
+
+ /* get the vpn */
+ addr = s_addr + (i * (1ul << shift));
+ vpn = hpt_vpn(addr, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+
+ slot_array[index] = slot;
+ vpn_array[index] = vpn;
+ if (index == PPC64_HUGE_HPTE_BATCH - 1) {
+ /*
+ * Now do a bluk invalidate
+ */
+ __pSeries_lpar_hugepage_invalidate(slot_array,
+ vpn_array,
+ PPC64_HUGE_HPTE_BATCH,
+ psize, ssize);
+ index = 0;
+ } else
+ index++;
+ }
+ if (index)
+ __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
+ index, psize, ssize);
+}
+#else
+static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
+ unsigned long addr,
+ unsigned char *hpte_slot_array,
+ int psize, int ssize, int local)
+{
+ WARN(1, "%s called without THP support\n", __func__);
+}
+#endif
+
+static int pSeries_lpar_hpte_removebolted(unsigned long ea,
+ int psize, int ssize)
+{
+ unsigned long vpn;
+ unsigned long slot, vsid;
+
+ vsid = get_kernel_vsid(ea, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
+
+ slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
+ if (slot == -1)
+ return -ENOENT;
+
+ /*
+ * lpar doesn't use the passed actual page size
+ */
+ pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0);
+ return 0;
+}
+
+
+static inline unsigned long compute_slot(real_pte_t pte,
+ unsigned long vpn,
+ unsigned long index,
+ unsigned long shift,
+ int ssize)
+{
+ unsigned long slot, hash, hidx;
+
+ hash = hpt_hash(vpn, shift, ssize);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ return slot;
+}
+
+/**
+ * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are
+ * "all within the same naturally aligned 8 page virtual address block".
+ */
+static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
+ unsigned long *param)
+{
+ unsigned long vpn;
+ unsigned long i, pix = 0;
+ unsigned long index, shift, slot, current_vpgb, vpgb;
+ real_pte_t pte;
+ int psize, ssize;
+
+ psize = batch->psize;
+ ssize = batch->ssize;
+
+ for (i = 0; i < number; i++) {
+ vpn = batch->vpn[i];
+ pte = batch->pte[i];
+ pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+ /*
+ * Shifting 3 bits more on the right to get a
+ * 8 pages aligned virtual addresse.
+ */
+ vpgb = (vpn >> (shift - VPN_SHIFT + 3));
+ if (!pix || vpgb != current_vpgb) {
+ /*
+ * Need to start a new 8 pages block, flush
+ * the current one if needed.
+ */
+ if (pix)
+ (void)call_block_remove(pix, param,
+ true);
+ current_vpgb = vpgb;
+ param[0] = hpte_encode_avpn(vpn, psize,
+ ssize);
+ pix = 1;
+ }
+
+ slot = compute_slot(pte, vpn, index, shift, ssize);
+ param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot;
+
+ if (pix == PLPAR_HCALL9_BUFSIZE) {
+ pix = call_block_remove(pix, param, false);
+ /*
+ * pix = 0 means that all the entries were
+ * removed, we can start a new block.
+ * Otherwise, this means that there are entries
+ * to retry, and pix points to latest one, so
+ * we should increment it and try to continue
+ * the same block.
+ */
+ if (pix)
+ pix++;
+ }
+ } pte_iterate_hashed_end();
+ }
+
+ if (pix)
+ (void)call_block_remove(pix, param, true);
+}
+
+/*
+ * TLB Block Invalidate Characteristics
+ *
+ * These characteristics define the size of the block the hcall H_BLOCK_REMOVE
+ * is able to process for each couple segment base page size, actual page size.
+ *
+ * The ibm,get-system-parameter properties is returning a buffer with the
+ * following layout:
+ *
+ * [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ]
+ * -----------------
+ * TLB Block Invalidate Specifiers:
+ * [ 1 byte LOG base 2 of the TLB invalidate block size being specified ]
+ * [ 1 byte Number of page sizes (N) that are supported for the specified
+ * TLB invalidate block size ]
+ * [ 1 byte Encoded segment base page size and actual page size
+ * MSB=0 means 4k segment base page size and actual page size
+ * MSB=1 the penc value in mmu_psize_def ]
+ * ...
+ * -----------------
+ * Next TLB Block Invalidate Specifiers...
+ * -----------------
+ * [ 0 ]
+ */
+static inline void set_hblkrm_bloc_size(int bpsize, int psize,
+ unsigned int block_size)
+{
+ if (block_size > hblkrm_size[bpsize][psize])
+ hblkrm_size[bpsize][psize] = block_size;
+}
+
+/*
+ * Decode the Encoded segment base page size and actual page size.
+ * PAPR specifies:
+ * - bit 7 is the L bit
+ * - bits 0-5 are the penc value
+ * If the L bit is 0, this means 4K segment base page size and actual page size
+ * otherwise the penc value should be read.
+ */
+#define HBLKRM_L_MASK 0x80
+#define HBLKRM_PENC_MASK 0x3f
+static inline void __init check_lp_set_hblkrm(unsigned int lp,
+ unsigned int block_size)
+{
+ unsigned int bpsize, psize;
+
+ /* First, check the L bit, if not set, this means 4K */
+ if ((lp & HBLKRM_L_MASK) == 0) {
+ set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
+ return;
+ }
+
+ lp &= HBLKRM_PENC_MASK;
+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
+ struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ if (def->penc[psize] == lp) {
+ set_hblkrm_bloc_size(bpsize, psize, block_size);
+ return;
+ }
+ }
+ }
+}
+
+/*
+ * The size of the TLB Block Invalidate Characteristics is variable. But at the
+ * maximum it will be the number of possible page sizes *2 + 10 bytes.
+ * Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size
+ * (128 bytes) for the buffer to get plenty of space.
+ */
+#define SPLPAR_TLB_BIC_MAXLENGTH 128
+
+void __init pseries_lpar_read_hblkrm_characteristics(void)
+{
+ static struct papr_sysparm_buf buf __initdata;
+ int len, idx, bpsize;
+
+ if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+ return;
+
+ if (papr_sysparm_get(PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRS, &buf))
+ return;
+
+ len = be16_to_cpu(buf.len);
+ if (len > SPLPAR_TLB_BIC_MAXLENGTH) {
+ pr_warn("%s too large returned buffer %d", __func__, len);
+ return;
+ }
+
+ idx = 0;
+ while (idx < len) {
+ u8 block_shift = buf.val[idx++];
+ u32 block_size;
+ unsigned int npsize;
+
+ if (!block_shift)
+ break;
+
+ block_size = 1 << block_shift;
+
+ for (npsize = buf.val[idx++];
+ npsize > 0 && idx < len; npsize--)
+ check_lp_set_hblkrm((unsigned int)buf.val[idx++],
+ block_size);
+ }
+
+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
+ for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
+ if (hblkrm_size[bpsize][idx])
+ pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
+ bpsize, idx, hblkrm_size[bpsize][idx]);
+}
+
+/*
+ * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
+ * lock.
+ */
+static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
+{
+ unsigned long vpn;
+ unsigned long i, pix, rc;
+ unsigned long flags = 0;
+ struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+ unsigned long param[PLPAR_HCALL9_BUFSIZE];
+ unsigned long index, shift, slot;
+ real_pte_t pte;
+ int psize, ssize;
+
+ if (lock_tlbie)
+ spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+ if (is_supported_hlbkrm(batch->psize, batch->psize)) {
+ do_block_remove(number, batch, param);
+ goto out;
+ }
+
+ psize = batch->psize;
+ ssize = batch->ssize;
+ pix = 0;
+ for (i = 0; i < number; i++) {
+ vpn = batch->vpn[i];
+ pte = batch->pte[i];
+ pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+ slot = compute_slot(pte, vpn, index, shift, ssize);
+ if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+ /*
+ * lpar doesn't use the passed actual page size
+ */
+ pSeries_lpar_hpte_invalidate(slot, vpn, psize,
+ 0, ssize, local);
+ } else {
+ param[pix] = HBR_REQUEST | HBR_AVPN | slot;
+ param[pix+1] = hpte_encode_avpn(vpn, psize,
+ ssize);
+ pix += 2;
+ if (pix == 8) {
+ rc = plpar_hcall9(H_BULK_REMOVE, param,
+ param[0], param[1], param[2],
+ param[3], param[4], param[5],
+ param[6], param[7]);
+ BUG_ON(rc != H_SUCCESS);
+ pix = 0;
+ }
+ }
+ } pte_iterate_hashed_end();
+ }
+ if (pix) {
+ param[pix] = HBR_END;
+ rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
+ param[2], param[3], param[4], param[5],
+ param[6], param[7]);
+ BUG_ON(rc != H_SUCCESS);
+ }
+
+out:
+ if (lock_tlbie)
+ spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+
+static int __init disable_bulk_remove(char *str)
+{
+ if (strcmp(str, "off") == 0 &&
+ firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+ pr_info("Disabling BULK_REMOVE firmware feature");
+ powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE;
+ }
+ return 1;
+}
+
+__setup("bulk_remove=", disable_bulk_remove);
+
+#define HPT_RESIZE_TIMEOUT 10000 /* ms */
+
+struct hpt_resize_state {
+ unsigned long shift;
+ int commit_rc;
+};
+
+static int pseries_lpar_resize_hpt_commit(void *data)
+{
+ struct hpt_resize_state *state = data;
+
+ state->commit_rc = plpar_resize_hpt_commit(0, state->shift);
+ if (state->commit_rc != H_SUCCESS)
+ return -EIO;
+
+ /* Hypervisor has transitioned the HTAB, update our globals */
+ ppc64_pft_size = state->shift;
+ htab_size_bytes = 1UL << ppc64_pft_size;
+ htab_hash_mask = (htab_size_bytes >> 7) - 1;
+
+ return 0;
+}
+
+/*
+ * Must be called in process context. The caller must hold the
+ * cpus_lock.
+ */
+static int pseries_lpar_resize_hpt(unsigned long shift)
+{
+ struct hpt_resize_state state = {
+ .shift = shift,
+ .commit_rc = H_FUNCTION,
+ };
+ unsigned int delay, total_delay = 0;
+ int rc;
+ ktime_t t0, t1, t2;
+
+ might_sleep();
+
+ if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+ return -ENODEV;
+
+ pr_info("Attempting to resize HPT to shift %lu\n", shift);
+
+ t0 = ktime_get();
+
+ rc = plpar_resize_hpt_prepare(0, shift);
+ while (H_IS_LONG_BUSY(rc)) {
+ delay = get_longbusy_msecs(rc);
+ total_delay += delay;
+ if (total_delay > HPT_RESIZE_TIMEOUT) {
+ /* prepare with shift==0 cancels an in-progress resize */
+ rc = plpar_resize_hpt_prepare(0, 0);
+ if (rc != H_SUCCESS)
+ pr_warn("Unexpected error %d cancelling timed out HPT resize\n",
+ rc);
+ return -ETIMEDOUT;
+ }
+ msleep(delay);
+ rc = plpar_resize_hpt_prepare(0, shift);
+ }
+
+ switch (rc) {
+ case H_SUCCESS:
+ /* Continue on */
+ break;
+
+ case H_PARAMETER:
+ pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n");
+ return -EINVAL;
+ case H_RESOURCE:
+ pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n");
+ return -EPERM;
+ default:
+ pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
+ return -EIO;
+ }
+
+ t1 = ktime_get();
+
+ rc = stop_machine_cpuslocked(pseries_lpar_resize_hpt_commit,
+ &state, NULL);
+
+ t2 = ktime_get();
+
+ if (rc != 0) {
+ switch (state.commit_rc) {
+ case H_PTEG_FULL:
+ return -ENOSPC;
+
+ default:
+ pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
+ state.commit_rc);
+ return -EIO;
+ };
+ }
+
+ pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
+ shift, (long long) ktime_ms_delta(t1, t0),
+ (long long) ktime_ms_delta(t2, t1));
+
+ return 0;
+}
+
+void __init hpte_init_pseries(void)
+{
+ mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate;
+ mmu_hash_ops.hpte_updatepp = pSeries_lpar_hpte_updatepp;
+ mmu_hash_ops.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp;
+ mmu_hash_ops.hpte_insert = pSeries_lpar_hpte_insert;
+ mmu_hash_ops.hpte_remove = pSeries_lpar_hpte_remove;
+ mmu_hash_ops.hpte_removebolted = pSeries_lpar_hpte_removebolted;
+ mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range;
+ mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all;
+ mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
+
+ if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
+ mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
+
+ /*
+ * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
+ * to inform the hypervisor that we wish to use the HPT.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pseries_lpar_register_process_table(0, 0, 0);
+}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+#ifdef CONFIG_PPC_RADIX_MMU
+void __init radix_init_pseries(void)
+{
+ pr_info("Using radix MMU under hypervisor\n");
+
+ pseries_lpar_register_process_table(__pa(process_tb),
+ 0, PRTB_SIZE_SHIFT - 12);
+}
+#endif
+
+#ifdef CONFIG_PPC_SMLPAR
+#define CMO_FREE_HINT_DEFAULT 1
+static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT;
+
+static int __init cmo_free_hint(char *str)
+{
+ char *parm;
+ parm = strstrip(str);
+
+ if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) {
+ pr_info("%s: CMO free page hinting is not active.\n", __func__);
+ cmo_free_hint_flag = 0;
+ return 1;
+ }
+
+ cmo_free_hint_flag = 1;
+ pr_info("%s: CMO free page hinting is active.\n", __func__);
+
+ if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0)
+ return 1;
+
+ return 0;
+}
+
+__setup("cmo_free_hint=", cmo_free_hint);
+
+static void pSeries_set_page_state(struct page *page, int order,
+ unsigned long state)
+{
+ int i, j;
+ unsigned long cmo_page_sz, addr;
+
+ cmo_page_sz = cmo_get_page_size();
+ addr = __pa((unsigned long)page_address(page));
+
+ for (i = 0; i < (1 << order); i++, addr += PAGE_SIZE) {
+ for (j = 0; j < PAGE_SIZE; j += cmo_page_sz)
+ plpar_hcall_norets(H_PAGE_INIT, state, addr + j, 0);
+ }
+}
+
+void arch_free_page(struct page *page, int order)
+{
+ if (radix_enabled())
+ return;
+ if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO))
+ return;
+
+ pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED);
+}
+EXPORT_SYMBOL(arch_free_page);
+
+#endif /* CONFIG_PPC_SMLPAR */
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_TRACEPOINTS
+#ifdef CONFIG_JUMP_LABEL
+struct static_key hcall_tracepoint_key = STATIC_KEY_INIT;
+
+int hcall_tracepoint_regfunc(void)
+{
+ static_key_slow_inc(&hcall_tracepoint_key);
+ return 0;
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+ static_key_slow_dec(&hcall_tracepoint_key);
+}
+#else
+/*
+ * We optimise our hcall path by placing hcall_tracepoint_refcount
+ * directly in the TOC so we can check if the hcall tracepoints are
+ * enabled via a single load.
+ */
+
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
+extern long hcall_tracepoint_refcount;
+
+int hcall_tracepoint_regfunc(void)
+{
+ hcall_tracepoint_refcount++;
+ return 0;
+}
+
+void hcall_tracepoint_unregfunc(void)
+{
+ hcall_tracepoint_refcount--;
+}
+#endif
+
+/*
+ * Keep track of hcall tracing depth and prevent recursion. Warn if any is
+ * detected because it may indicate a problem. This will not catch all
+ * problems with tracing code making hcalls, because the tracing might have
+ * been invoked from a non-hcall, so the first hcall could recurse into it
+ * without warning here, but this better than nothing.
+ *
+ * Hcalls with specific problems being traced should use the _notrace
+ * plpar_hcall variants.
+ */
+static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
+
+
+notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+{
+ unsigned long flags;
+ unsigned int *depth;
+
+ local_irq_save(flags);
+
+ depth = this_cpu_ptr(&hcall_trace_depth);
+
+ if (WARN_ON_ONCE(*depth))
+ goto out;
+
+ (*depth)++;
+ preempt_disable();
+ trace_hcall_entry(opcode, args);
+ (*depth)--;
+
+out:
+ local_irq_restore(flags);
+}
+
+notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
+{
+ unsigned long flags;
+ unsigned int *depth;
+
+ local_irq_save(flags);
+
+ depth = this_cpu_ptr(&hcall_trace_depth);
+
+ if (*depth) /* Don't warn again on the way out */
+ goto out;
+
+ (*depth)++;
+ trace_hcall_exit(opcode, retval, retbuf);
+ preempt_enable();
+ (*depth)--;
+
+out:
+ local_irq_restore(flags);
+}
+#endif
+
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
+{
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+ rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+ mpp_data->entitled_mem = retbuf[0];
+ mpp_data->mapped_mem = retbuf[1];
+
+ mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+ mpp_data->pool_num = retbuf[2] & 0xffff;
+
+ mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+ mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+ mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffffUL;
+
+ mpp_data->pool_size = retbuf[4];
+ mpp_data->loan_request = retbuf[5];
+ mpp_data->backing_mem = retbuf[6];
+
+ return rc;
+}
+EXPORT_SYMBOL(h_get_mpp);
+
+int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
+{
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 };
+
+ rc = plpar_hcall9(H_GET_MPP_X, retbuf);
+
+ mpp_x_data->coalesced_bytes = retbuf[0];
+ mpp_x_data->pool_coalesced_bytes = retbuf[1];
+ mpp_x_data->pool_purr_cycles = retbuf[2];
+ mpp_x_data->pool_spurr_cycles = retbuf[3];
+
+ return rc;
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static unsigned long __init vsid_unscramble(unsigned long vsid, int ssize)
+{
+ unsigned long protovsid;
+ unsigned long va_bits = VA_BITS;
+ unsigned long modinv, vsid_modulus;
+ unsigned long max_mod_inv, tmp_modinv;
+
+ if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
+ va_bits = 65;
+
+ if (ssize == MMU_SEGSIZE_256M) {
+ modinv = VSID_MULINV_256M;
+ vsid_modulus = ((1UL << (va_bits - SID_SHIFT)) - 1);
+ } else {
+ modinv = VSID_MULINV_1T;
+ vsid_modulus = ((1UL << (va_bits - SID_SHIFT_1T)) - 1);
+ }
+
+ /*
+ * vsid outside our range.
+ */
+ if (vsid >= vsid_modulus)
+ return 0;
+
+ /*
+ * If modinv is the modular multiplicate inverse of (x % vsid_modulus)
+ * and vsid = (protovsid * x) % vsid_modulus, then we say:
+ * protovsid = (vsid * modinv) % vsid_modulus
+ */
+
+ /* Check if (vsid * modinv) overflow (63 bits) */
+ max_mod_inv = 0x7fffffffffffffffull / vsid;
+ if (modinv < max_mod_inv)
+ return (vsid * modinv) % vsid_modulus;
+
+ tmp_modinv = modinv/max_mod_inv;
+ modinv %= max_mod_inv;
+
+ protovsid = (((vsid * max_mod_inv) % vsid_modulus) * tmp_modinv) % vsid_modulus;
+ protovsid = (protovsid + vsid * modinv) % vsid_modulus;
+
+ return protovsid;
+}
+
+static int __init reserve_vrma_context_id(void)
+{
+ unsigned long protovsid;
+
+ /*
+ * Reserve context ids which map to reserved virtual addresses. For now
+ * we only reserve the context id which maps to the VRMA VSID. We ignore
+ * the addresses in "ibm,adjunct-virtual-addresses" because we don't
+ * enable adjunct support via the "ibm,client-architecture-support"
+ * interface.
+ */
+ protovsid = vsid_unscramble(VRMA_VSID, MMU_SEGSIZE_1T);
+ hash__reserve_context_id(protovsid >> ESID_BITS_1T);
+ return 0;
+}
+machine_device_initcall(pseries, reserve_vrma_context_id);
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+/* debugfs file interface for vpa data */
+static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len,
+ loff_t *pos)
+{
+ int cpu = (long)filp->private_data;
+ struct lppaca *lppaca = &lppaca_of(cpu);
+
+ return simple_read_from_buffer(buf, len, pos, lppaca,
+ sizeof(struct lppaca));
+}
+
+static const struct file_operations vpa_fops = {
+ .open = simple_open,
+ .read = vpa_file_read,
+ .llseek = default_llseek,
+};
+
+static int __init vpa_debugfs_init(void)
+{
+ char name[16];
+ long i;
+ struct dentry *vpa_dir;
+
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return 0;
+
+ vpa_dir = debugfs_create_dir("vpa", arch_debugfs_dir);
+
+ /* set up the per-cpu vpa file*/
+ for_each_possible_cpu(i) {
+ sprintf(name, "cpu-%ld", i);
+ debugfs_create_file(name, 0400, vpa_dir, (void *)i, &vpa_fops);
+ }
+
+ return 0;
+}
+machine_arch_initcall(pseries, vpa_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
new file mode 100644
index 000000000..1c151d77e
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -0,0 +1,802 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC64 LPAR Configuration Information Driver
+ *
+ * Dave Engebretsen engebret@us.ibm.com
+ * Copyright (c) 2003 Dave Engebretsen
+ * Will Schmidt willschm@us.ibm.com
+ * SPLPAR updates, Copyright (c) 2003 Will Schmidt IBM Corporation.
+ * seq_file updates, Copyright (c) 2004 Will Schmidt IBM Corporation.
+ * Nathan Lynch nathanl@austin.ibm.com
+ * Added lparcfg_write, Copyright (C) 2004 Nathan Lynch IBM Corporation.
+ *
+ * This driver creates a proc file at /proc/ppc64/lparcfg which contains
+ * keyword - value pairs that specify the configuration of the partition.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <asm/papr-sysparm.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/hugetlb.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/time.h>
+#include <asm/vdso_datapage.h>
+#include <asm/vio.h>
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+#include <asm/drmem.h>
+
+#include "pseries.h"
+#include "vas.h" /* pseries_vas_dlpar_cpu() */
+
+/*
+ * This isn't a module but we expose that to userspace
+ * via /proc so leave the definitions here
+ */
+#define MODULE_VERS "1.9"
+#define MODULE_NAME "lparcfg"
+
+/* #define LPARCFG_DEBUG */
+
+/*
+ * Track sum of all purrs across all processors. This is used to further
+ * calculate usage values by different applications
+ */
+static void cpu_get_purr(void *arg)
+{
+ atomic64_t *sum = arg;
+
+ atomic64_add(mfspr(SPRN_PURR), sum);
+}
+
+static unsigned long get_purr(void)
+{
+ atomic64_t purr = ATOMIC64_INIT(0);
+
+ on_each_cpu(cpu_get_purr, &purr, 1);
+
+ return atomic64_read(&purr);
+}
+
+/*
+ * Methods used to fetch LPAR data when running on a pSeries platform.
+ */
+
+struct hvcall_ppp_data {
+ u64 entitlement;
+ u64 unallocated_entitlement;
+ u16 group_num;
+ u16 pool_num;
+ u8 capped;
+ u8 weight;
+ u8 unallocated_weight;
+ u16 active_procs_in_pool;
+ u16 active_system_procs;
+ u16 phys_platform_procs;
+ u32 max_proc_cap_avail;
+ u32 entitled_proc_cap_avail;
+};
+
+/*
+ * H_GET_PPP hcall returns info in 4 parms.
+ * entitled_capacity,unallocated_capacity,
+ * aggregation, resource_capability).
+ *
+ * R4 = Entitled Processor Capacity Percentage.
+ * R5 = Unallocated Processor Capacity Percentage.
+ * R6 (AABBCCDDEEFFGGHH).
+ * XXXX - reserved (0)
+ * XXXX - reserved (0)
+ * XXXX - Group Number
+ * XXXX - Pool Number.
+ * R7 (IIJJKKLLMMNNOOPP).
+ * XX - reserved. (0)
+ * XX - bit 0-6 reserved (0). bit 7 is Capped indicator.
+ * XX - variable processor Capacity Weight
+ * XX - Unallocated Variable Processor Capacity Weight.
+ * XXXX - Active processors in Physical Processor Pool.
+ * XXXX - Processors active on platform.
+ * R8 (QQQQRRRRRRSSSSSS). if ibm,partition-performance-parameters-level >= 1
+ * XXXX - Physical platform procs allocated to virtualization.
+ * XXXXXX - Max procs capacity % available to the partitions pool.
+ * XXXXXX - Entitled procs capacity % available to the
+ * partitions pool.
+ */
+static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
+{
+ unsigned long rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+ rc = plpar_hcall9(H_GET_PPP, retbuf);
+
+ ppp_data->entitlement = retbuf[0];
+ ppp_data->unallocated_entitlement = retbuf[1];
+
+ ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+ ppp_data->pool_num = retbuf[2] & 0xffff;
+
+ ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
+ ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
+ ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
+ ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
+ ppp_data->active_system_procs = retbuf[3] & 0xffff;
+
+ ppp_data->phys_platform_procs = retbuf[4] >> 6 * 8;
+ ppp_data->max_proc_cap_avail = (retbuf[4] >> 3 * 8) & 0xffffff;
+ ppp_data->entitled_proc_cap_avail = retbuf[4] & 0xffffff;
+
+ return rc;
+}
+
+static void show_gpci_data(struct seq_file *m)
+{
+ struct hv_gpci_request_buffer *buf;
+ unsigned int affinity_score;
+ long ret;
+
+ buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+ if (buf == NULL)
+ return;
+
+ /*
+ * Show the local LPAR's affinity score.
+ *
+ * 0xB1 selects the Affinity_Domain_Info_By_Partition subcall.
+ * The score is at byte 0xB in the output buffer.
+ */
+ memset(&buf->params, 0, sizeof(buf->params));
+ buf->params.counter_request = cpu_to_be32(0xB1);
+ buf->params.starting_index = cpu_to_be32(-1); /* local LPAR */
+ buf->params.counter_info_version_in = 0x5; /* v5+ for score */
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(buf),
+ sizeof(*buf));
+ if (ret != H_SUCCESS) {
+ pr_debug("hcall failed: H_GET_PERF_COUNTER_INFO: %ld, %x\n",
+ ret, be32_to_cpu(buf->params.detail_rc));
+ goto out;
+ }
+ affinity_score = buf->bytes[0xB];
+ seq_printf(m, "partition_affinity_score=%u\n", affinity_score);
+out:
+ kfree(buf);
+}
+
+static unsigned h_pic(unsigned long *pool_idle_time,
+ unsigned long *num_procs)
+{
+ unsigned long rc;
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ rc = plpar_hcall(H_PIC, retbuf);
+
+ *pool_idle_time = retbuf[0];
+ *num_procs = retbuf[1];
+
+ return rc;
+}
+
+/*
+ * parse_ppp_data
+ * Parse out the data returned from h_get_ppp and h_pic
+ */
+static void parse_ppp_data(struct seq_file *m)
+{
+ struct hvcall_ppp_data ppp_data;
+ struct device_node *root;
+ const __be32 *perf_level;
+ int rc;
+
+ rc = h_get_ppp(&ppp_data);
+ if (rc)
+ return;
+
+ seq_printf(m, "partition_entitled_capacity=%lld\n",
+ ppp_data.entitlement);
+ seq_printf(m, "group=%d\n", ppp_data.group_num);
+ seq_printf(m, "system_active_processors=%d\n",
+ ppp_data.active_system_procs);
+
+ /* pool related entries are appropriate for shared configs */
+ if (lppaca_shared_proc()) {
+ unsigned long pool_idle_time, pool_procs;
+
+ seq_printf(m, "pool=%d\n", ppp_data.pool_num);
+
+ /* report pool_capacity in percentage */
+ seq_printf(m, "pool_capacity=%d\n",
+ ppp_data.active_procs_in_pool * 100);
+
+ h_pic(&pool_idle_time, &pool_procs);
+ seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+ seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ }
+
+ seq_printf(m, "unallocated_capacity_weight=%d\n",
+ ppp_data.unallocated_weight);
+ seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
+ seq_printf(m, "capped=%d\n", ppp_data.capped);
+ seq_printf(m, "unallocated_capacity=%lld\n",
+ ppp_data.unallocated_entitlement);
+
+ /* The last bits of information returned from h_get_ppp are only
+ * valid if the ibm,partition-performance-parameters-level
+ * property is >= 1.
+ */
+ root = of_find_node_by_path("/");
+ if (root) {
+ perf_level = of_get_property(root,
+ "ibm,partition-performance-parameters-level",
+ NULL);
+ if (perf_level && (be32_to_cpup(perf_level) >= 1)) {
+ seq_printf(m,
+ "physical_procs_allocated_to_virtualization=%d\n",
+ ppp_data.phys_platform_procs);
+ seq_printf(m, "max_proc_capacity_available=%d\n",
+ ppp_data.max_proc_cap_avail);
+ seq_printf(m, "entitled_proc_capacity_available=%d\n",
+ ppp_data.entitled_proc_cap_avail);
+ }
+
+ of_node_put(root);
+ }
+}
+
+/**
+ * parse_mpp_data
+ * Parse out data returned from h_get_mpp
+ */
+static void parse_mpp_data(struct seq_file *m)
+{
+ struct hvcall_mpp_data mpp_data;
+ int rc;
+
+ rc = h_get_mpp(&mpp_data);
+ if (rc)
+ return;
+
+ seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
+
+ if (mpp_data.mapped_mem != -1)
+ seq_printf(m, "mapped_entitled_memory=%ld\n",
+ mpp_data.mapped_mem);
+
+ seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
+ seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
+
+ seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
+ seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
+ mpp_data.unallocated_mem_weight);
+ seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
+ mpp_data.unallocated_entitlement);
+
+ if (mpp_data.pool_size != -1)
+ seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
+ mpp_data.pool_size);
+
+ seq_printf(m, "entitled_memory_loan_request=%ld\n",
+ mpp_data.loan_request);
+
+ seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
+}
+
+/**
+ * parse_mpp_x_data
+ * Parse out data returned from h_get_mpp_x
+ */
+static void parse_mpp_x_data(struct seq_file *m)
+{
+ struct hvcall_mpp_x_data mpp_x_data;
+
+ if (!firmware_has_feature(FW_FEATURE_XCMO))
+ return;
+ if (h_get_mpp_x(&mpp_x_data))
+ return;
+
+ seq_printf(m, "coalesced_bytes=%ld\n", mpp_x_data.coalesced_bytes);
+
+ if (mpp_x_data.pool_coalesced_bytes)
+ seq_printf(m, "pool_coalesced_bytes=%ld\n",
+ mpp_x_data.pool_coalesced_bytes);
+ if (mpp_x_data.pool_purr_cycles)
+ seq_printf(m, "coalesce_pool_purr=%ld\n", mpp_x_data.pool_purr_cycles);
+ if (mpp_x_data.pool_spurr_cycles)
+ seq_printf(m, "coalesce_pool_spurr=%ld\n", mpp_x_data.pool_spurr_cycles);
+}
+
+/*
+ * Read the lpar name using the RTAS ibm,get-system-parameter call.
+ *
+ * The name read through this call is updated if changes are made by the end
+ * user on the hypervisor side.
+ *
+ * Some hypervisor (like Qemu) may not provide this value. In that case, a non
+ * null value is returned.
+ */
+static int read_rtas_lpar_name(struct seq_file *m)
+{
+ struct papr_sysparm_buf *buf;
+ int err;
+
+ buf = papr_sysparm_buf_alloc();
+ if (!buf)
+ return -ENOMEM;
+
+ err = papr_sysparm_get(PAPR_SYSPARM_LPAR_NAME, buf);
+ if (!err)
+ seq_printf(m, "partition_name=%s\n", buf->val);
+
+ papr_sysparm_buf_free(buf);
+ return err;
+}
+
+/*
+ * Read the LPAR name from the Device Tree.
+ *
+ * The value read in the DT is not updated if the end-user is touching the LPAR
+ * name on the hypervisor side.
+ */
+static int read_dt_lpar_name(struct seq_file *m)
+{
+ const char *name;
+
+ if (of_property_read_string(of_root, "ibm,partition-name", &name))
+ return -ENOENT;
+
+ seq_printf(m, "partition_name=%s\n", name);
+ return 0;
+}
+
+static void read_lpar_name(struct seq_file *m)
+{
+ if (read_rtas_lpar_name(m) && read_dt_lpar_name(m))
+ pr_err_once("Error can't get the LPAR name");
+}
+
+#define SPLPAR_MAXLENGTH 1026*(sizeof(char))
+
+/*
+ * parse_system_parameter_string()
+ * Retrieve the potential_processors, max_entitled_capacity and friends
+ * through the get-system-parameter rtas call. Replace keyword strings as
+ * necessary.
+ */
+static void parse_system_parameter_string(struct seq_file *m)
+{
+ struct papr_sysparm_buf *buf;
+
+ buf = papr_sysparm_buf_alloc();
+ if (!buf)
+ return;
+
+ if (papr_sysparm_get(PAPR_SYSPARM_SHARED_PROC_LPAR_ATTRS, buf)) {
+ goto out_free;
+ } else {
+ const char *local_buffer;
+ int splpar_strlen;
+ int idx, w_idx;
+ char *workbuffer = kzalloc(SPLPAR_MAXLENGTH, GFP_KERNEL);
+
+ if (!workbuffer)
+ goto out_free;
+
+ splpar_strlen = be16_to_cpu(buf->len);
+ local_buffer = buf->val;
+
+ w_idx = 0;
+ idx = 0;
+ while ((*local_buffer) && (idx < splpar_strlen)) {
+ workbuffer[w_idx++] = local_buffer[idx++];
+ if ((local_buffer[idx] == ',')
+ || (local_buffer[idx] == '\0')) {
+ workbuffer[w_idx] = '\0';
+ if (w_idx) {
+ /* avoid the empty string */
+ seq_printf(m, "%s\n", workbuffer);
+ }
+ memset(workbuffer, 0, SPLPAR_MAXLENGTH);
+ idx++; /* skip the comma */
+ w_idx = 0;
+ } else if (local_buffer[idx] == '=') {
+ /* code here to replace workbuffer contents
+ with different keyword strings */
+ if (0 == strcmp(workbuffer, "MaxEntCap")) {
+ strcpy(workbuffer,
+ "partition_max_entitled_capacity");
+ w_idx = strlen(workbuffer);
+ }
+ if (0 == strcmp(workbuffer, "MaxPlatProcs")) {
+ strcpy(workbuffer,
+ "system_potential_processors");
+ w_idx = strlen(workbuffer);
+ }
+ }
+ }
+ kfree(workbuffer);
+ local_buffer -= 2; /* back up over strlen value */
+ }
+out_free:
+ papr_sysparm_buf_free(buf);
+}
+
+/* Return the number of processors in the system.
+ * This function reads through the device tree and counts
+ * the virtual processors, this does not include threads.
+ */
+static int lparcfg_count_active_processors(void)
+{
+ struct device_node *cpus_dn;
+ int count = 0;
+
+ for_each_node_by_type(cpus_dn, "cpu") {
+#ifdef LPARCFG_DEBUG
+ printk(KERN_ERR "cpus_dn %p\n", cpus_dn);
+#endif
+ count++;
+ }
+ return count;
+}
+
+static void pseries_cmo_data(struct seq_file *m)
+{
+ int cpu;
+ unsigned long cmo_faults = 0;
+ unsigned long cmo_fault_time = 0;
+
+ seq_printf(m, "cmo_enabled=%d\n", firmware_has_feature(FW_FEATURE_CMO));
+
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ cmo_faults += be64_to_cpu(lppaca_of(cpu).cmo_faults);
+ cmo_fault_time += be64_to_cpu(lppaca_of(cpu).cmo_fault_time);
+ }
+
+ seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
+ seq_printf(m, "cmo_fault_time_usec=%lu\n",
+ cmo_fault_time / tb_ticks_per_usec);
+ seq_printf(m, "cmo_primary_psp=%d\n", cmo_get_primary_psp());
+ seq_printf(m, "cmo_secondary_psp=%d\n", cmo_get_secondary_psp());
+ seq_printf(m, "cmo_page_size=%lu\n", cmo_get_page_size());
+}
+
+static void splpar_dispatch_data(struct seq_file *m)
+{
+ int cpu;
+ unsigned long dispatches = 0;
+ unsigned long dispatch_dispersions = 0;
+
+ for_each_possible_cpu(cpu) {
+ dispatches += be32_to_cpu(lppaca_of(cpu).yield_count);
+ dispatch_dispersions +=
+ be32_to_cpu(lppaca_of(cpu).dispersion_count);
+ }
+
+ seq_printf(m, "dispatches=%lu\n", dispatches);
+ seq_printf(m, "dispatch_dispersions=%lu\n", dispatch_dispersions);
+}
+
+static void parse_em_data(struct seq_file *m)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ if (firmware_has_feature(FW_FEATURE_LPAR) &&
+ plpar_hcall(H_GET_EM_PARMS, retbuf) == H_SUCCESS)
+ seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
+}
+
+static void maxmem_data(struct seq_file *m)
+{
+ unsigned long maxmem = 0;
+
+ maxmem += (unsigned long)drmem_info->n_lmbs * drmem_info->lmb_size;
+ maxmem += hugetlb_total_pages() * PAGE_SIZE;
+
+ seq_printf(m, "MaxMem=%lu\n", maxmem);
+}
+
+static int pseries_lparcfg_data(struct seq_file *m, void *v)
+{
+ int partition_potential_processors;
+ int partition_active_processors;
+ struct device_node *rtas_node;
+ const __be32 *lrdrp = NULL;
+
+ rtas_node = of_find_node_by_path("/rtas");
+ if (rtas_node)
+ lrdrp = of_get_property(rtas_node, "ibm,lrdr-capacity", NULL);
+
+ if (lrdrp == NULL) {
+ partition_potential_processors = vdso_data->processorCount;
+ } else {
+ partition_potential_processors = be32_to_cpup(lrdrp + 4);
+ }
+ of_node_put(rtas_node);
+
+ partition_active_processors = lparcfg_count_active_processors();
+
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ /* this call handles the ibm,get-system-parameter contents */
+ read_lpar_name(m);
+ parse_system_parameter_string(m);
+ parse_ppp_data(m);
+ parse_mpp_data(m);
+ parse_mpp_x_data(m);
+ pseries_cmo_data(m);
+ splpar_dispatch_data(m);
+
+ seq_printf(m, "purr=%ld\n", get_purr());
+ seq_printf(m, "tbr=%ld\n", mftb());
+ } else { /* non SPLPAR case */
+
+ seq_printf(m, "system_active_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "system_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "partition_max_entitled_capacity=%d\n",
+ partition_potential_processors * 100);
+
+ seq_printf(m, "partition_entitled_capacity=%d\n",
+ partition_active_processors * 100);
+ }
+
+ show_gpci_data(m);
+
+ seq_printf(m, "partition_active_processors=%d\n",
+ partition_active_processors);
+
+ seq_printf(m, "partition_potential_processors=%d\n",
+ partition_potential_processors);
+
+ seq_printf(m, "shared_processor_mode=%d\n",
+ lppaca_shared_proc());
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (!radix_enabled())
+ seq_printf(m, "slb_size=%d\n", mmu_slb_size);
+#endif
+ parse_em_data(m);
+ maxmem_data(m);
+
+ seq_printf(m, "security_flavor=%u\n", pseries_security_flavor);
+
+ return 0;
+}
+
+static ssize_t update_ppp(u64 *entitlement, u8 *weight)
+{
+ struct hvcall_ppp_data ppp_data;
+ u8 new_weight;
+ u64 new_entitled;
+ ssize_t retval;
+
+ /* Get our current parameters */
+ retval = h_get_ppp(&ppp_data);
+ if (retval)
+ return retval;
+
+ if (entitlement) {
+ new_weight = ppp_data.weight;
+ new_entitled = *entitlement;
+ } else if (weight) {
+ new_weight = *weight;
+ new_entitled = ppp_data.entitlement;
+ } else
+ return -EINVAL;
+
+ pr_debug("%s: current_entitled = %llu, current_weight = %u\n",
+ __func__, ppp_data.entitlement, ppp_data.weight);
+
+ pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
+ __func__, new_entitled, new_weight);
+
+ retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
+ return retval;
+}
+
+/**
+ * update_mpp
+ *
+ * Update the memory entitlement and weight for the partition. Caller must
+ * specify either a new entitlement or weight, not both, to be updated
+ * since the h_set_mpp call takes both entitlement and weight as parameters.
+ */
+static ssize_t update_mpp(u64 *entitlement, u8 *weight)
+{
+ struct hvcall_mpp_data mpp_data;
+ u64 new_entitled;
+ u8 new_weight;
+ ssize_t rc;
+
+ if (entitlement) {
+ /* Check with vio to ensure the new memory entitlement
+ * can be handled.
+ */
+ rc = vio_cmo_entitlement_update(*entitlement);
+ if (rc)
+ return rc;
+ }
+
+ rc = h_get_mpp(&mpp_data);
+ if (rc)
+ return rc;
+
+ if (entitlement) {
+ new_weight = mpp_data.mem_weight;
+ new_entitled = *entitlement;
+ } else if (weight) {
+ new_weight = *weight;
+ new_entitled = mpp_data.entitled_mem;
+ } else
+ return -EINVAL;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+ __func__, mpp_data.entitled_mem, mpp_data.mem_weight);
+
+ pr_debug("%s: new_entitled = %llu, new_weight = %u\n",
+ __func__, new_entitled, new_weight);
+
+ rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
+ return rc;
+}
+
+/*
+ * Interface for changing system parameters (variable capacity weight
+ * and entitled capacity). Format of input is "param_name=value";
+ * anything after value is ignored. Valid parameters at this time are
+ * "partition_entitled_capacity" and "capacity_weight". We use
+ * H_SET_PPP to alter parameters.
+ *
+ * This function should be invoked only on systems with
+ * FW_FEATURE_SPLPAR.
+ */
+static ssize_t lparcfg_write(struct file *file, const char __user * buf,
+ size_t count, loff_t * off)
+{
+ char kbuf[64];
+ char *tmp;
+ u64 new_entitled, *new_entitled_ptr = &new_entitled;
+ u8 new_weight, *new_weight_ptr = &new_weight;
+ ssize_t retval;
+
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return -EINVAL;
+
+ if (count > sizeof(kbuf))
+ return -EINVAL;
+
+ if (copy_from_user(kbuf, buf, count))
+ return -EFAULT;
+
+ kbuf[count - 1] = '\0';
+ tmp = strchr(kbuf, '=');
+ if (!tmp)
+ return -EINVAL;
+
+ *tmp++ = '\0';
+
+ if (!strcmp(kbuf, "partition_entitled_capacity")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
+
+ retval = update_ppp(new_entitled_ptr, NULL);
+
+ if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
+ /*
+ * The hypervisor assigns VAS resources based
+ * on entitled capacity for shared mode.
+ * Reconfig VAS windows based on DLPAR CPU events.
+ */
+ if (pseries_vas_dlpar_cpu() != 0)
+ retval = H_HARDWARE;
+ }
+ } else if (!strcmp(kbuf, "capacity_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
+
+ retval = update_ppp(NULL, new_weight_ptr);
+ } else if (!strcmp(kbuf, "entitled_memory")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
+
+ retval = update_mpp(new_entitled_ptr, NULL);
+ } else if (!strcmp(kbuf, "entitled_memory_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
+
+ retval = update_mpp(NULL, new_weight_ptr);
+ } else
+ return -EINVAL;
+
+ if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
+ retval = count;
+ } else if (retval == H_BUSY) {
+ retval = -EBUSY;
+ } else if (retval == H_HARDWARE) {
+ retval = -EIO;
+ } else if (retval == H_PARAMETER) {
+ retval = -EINVAL;
+ }
+
+ return retval;
+}
+
+static int lparcfg_data(struct seq_file *m, void *v)
+{
+ struct device_node *rootdn;
+ const char *model = "";
+ const char *system_id = "";
+ const char *tmp;
+ const __be32 *lp_index_ptr;
+ unsigned int lp_index = 0;
+
+ seq_printf(m, "%s %s\n", MODULE_NAME, MODULE_VERS);
+
+ rootdn = of_find_node_by_path("/");
+ if (rootdn) {
+ tmp = of_get_property(rootdn, "model", NULL);
+ if (tmp)
+ model = tmp;
+ tmp = of_get_property(rootdn, "system-id", NULL);
+ if (tmp)
+ system_id = tmp;
+ lp_index_ptr = of_get_property(rootdn, "ibm,partition-no",
+ NULL);
+ if (lp_index_ptr)
+ lp_index = be32_to_cpup(lp_index_ptr);
+ of_node_put(rootdn);
+ }
+ seq_printf(m, "serial_number=%s\n", system_id);
+ seq_printf(m, "system_type=%s\n", model);
+ seq_printf(m, "partition_id=%d\n", (int)lp_index);
+
+ return pseries_lparcfg_data(m, v);
+}
+
+static int lparcfg_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lparcfg_data, NULL);
+}
+
+static const struct proc_ops lparcfg_proc_ops = {
+ .proc_read = seq_read,
+ .proc_write = lparcfg_write,
+ .proc_open = lparcfg_open,
+ .proc_release = single_release,
+ .proc_lseek = seq_lseek,
+};
+
+static int __init lparcfg_init(void)
+{
+ umode_t mode = 0444;
+
+ /* Allow writing if we have FW_FEATURE_SPLPAR */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR))
+ mode |= 0200;
+
+ if (!proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_proc_ops)) {
+ printk(KERN_ERR "Failed to create powerpc/lparcfg\n");
+ return -EIO;
+ }
+ return 0;
+}
+machine_device_initcall(pseries, lparcfg_init);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
new file mode 100644
index 000000000..0161226d8
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -0,0 +1,830 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support for Partition Mobility/Migration
+ *
+ * Copyright (C) 2010 Nathan Fontenot
+ * Copyright (C) 2010 IBM Corporation
+ */
+
+
+#define pr_fmt(fmt) "mobility: " fmt
+
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/nmi.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/stat.h>
+#include <linux/stop_machine.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/stringify.h>
+
+#include <asm/machdep.h>
+#include <asm/nmi.h>
+#include <asm/rtas.h>
+#include "pseries.h"
+#include "vas.h" /* vas_migration_handler() */
+#include "../../kernel/cacheinfo.h"
+
+static struct kobject *mobility_kobj;
+
+struct update_props_workarea {
+ __be32 phandle;
+ __be32 state;
+ __be64 reserved;
+ __be32 nprops;
+} __packed;
+
+#define NODE_ACTION_MASK 0xff000000
+#define NODE_COUNT_MASK 0x00ffffff
+
+#define DELETE_DT_NODE 0x01000000
+#define UPDATE_DT_NODE 0x02000000
+#define ADD_DT_NODE 0x03000000
+
+#define MIGRATION_SCOPE (1)
+#define PRRN_SCOPE -2
+
+#ifdef CONFIG_PPC_WATCHDOG
+static unsigned int nmi_wd_lpm_factor = 200;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table nmi_wd_lpm_factor_ctl_table[] = {
+ {
+ .procname = "nmi_wd_lpm_factor",
+ .data = &nmi_wd_lpm_factor,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ },
+ {}
+};
+
+static int __init register_nmi_wd_lpm_factor_sysctl(void)
+{
+ register_sysctl("kernel", nmi_wd_lpm_factor_ctl_table);
+
+ return 0;
+}
+device_initcall(register_nmi_wd_lpm_factor_sysctl);
+#endif /* CONFIG_SYSCTL */
+#endif /* CONFIG_PPC_WATCHDOG */
+
+static int mobility_rtas_call(int token, char *buf, s32 scope)
+{
+ int rc;
+
+ spin_lock(&rtas_data_buf_lock);
+
+ memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
+ rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, scope);
+ memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+
+ spin_unlock(&rtas_data_buf_lock);
+ return rc;
+}
+
+static int delete_dt_node(struct device_node *dn)
+{
+ struct device_node *pdn;
+ bool is_platfac;
+
+ pdn = of_get_parent(dn);
+ is_platfac = of_node_is_type(dn, "ibm,platform-facilities") ||
+ of_node_is_type(pdn, "ibm,platform-facilities");
+ of_node_put(pdn);
+
+ /*
+ * The drivers that bind to nodes in the platform-facilities
+ * hierarchy don't support node removal, and the removal directive
+ * from firmware is always followed by an add of an equivalent
+ * node. The capability (e.g. RNG, encryption, compression)
+ * represented by the node is never interrupted by the migration.
+ * So ignore changes to this part of the tree.
+ */
+ if (is_platfac) {
+ pr_notice("ignoring remove operation for %pOFfp\n", dn);
+ return 0;
+ }
+
+ pr_debug("removing node %pOFfp\n", dn);
+ dlpar_detach_node(dn);
+ return 0;
+}
+
+static int update_dt_property(struct device_node *dn, struct property **prop,
+ const char *name, u32 vd, char *value)
+{
+ struct property *new_prop = *prop;
+ int more = 0;
+
+ /* A negative 'vd' value indicates that only part of the new property
+ * value is contained in the buffer and we need to call
+ * ibm,update-properties again to get the rest of the value.
+ *
+ * A negative value is also the two's compliment of the actual value.
+ */
+ if (vd & 0x80000000) {
+ vd = ~vd + 1;
+ more = 1;
+ }
+
+ if (new_prop) {
+ /* partial property fixup */
+ char *new_data = kzalloc(new_prop->length + vd, GFP_KERNEL);
+ if (!new_data)
+ return -ENOMEM;
+
+ memcpy(new_data, new_prop->value, new_prop->length);
+ memcpy(new_data + new_prop->length, value, vd);
+
+ kfree(new_prop->value);
+ new_prop->value = new_data;
+ new_prop->length += vd;
+ } else {
+ new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+ if (!new_prop)
+ return -ENOMEM;
+
+ new_prop->name = kstrdup(name, GFP_KERNEL);
+ if (!new_prop->name) {
+ kfree(new_prop);
+ return -ENOMEM;
+ }
+
+ new_prop->length = vd;
+ new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
+ if (!new_prop->value) {
+ kfree(new_prop->name);
+ kfree(new_prop);
+ return -ENOMEM;
+ }
+
+ memcpy(new_prop->value, value, vd);
+ *prop = new_prop;
+ }
+
+ if (!more) {
+ pr_debug("updating node %pOF property %s\n", dn, name);
+ of_update_property(dn, new_prop);
+ *prop = NULL;
+ }
+
+ return 0;
+}
+
+static int update_dt_node(struct device_node *dn, s32 scope)
+{
+ struct update_props_workarea *upwa;
+ struct property *prop = NULL;
+ int i, rc, rtas_rc;
+ char *prop_data;
+ char *rtas_buf;
+ int update_properties_token;
+ u32 nprops;
+ u32 vd;
+
+ update_properties_token = rtas_function_token(RTAS_FN_IBM_UPDATE_PROPERTIES);
+ if (update_properties_token == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!rtas_buf)
+ return -ENOMEM;
+
+ upwa = (struct update_props_workarea *)&rtas_buf[0];
+ upwa->phandle = cpu_to_be32(dn->phandle);
+
+ do {
+ rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
+ scope);
+ if (rtas_rc < 0)
+ break;
+
+ prop_data = rtas_buf + sizeof(*upwa);
+ nprops = be32_to_cpu(upwa->nprops);
+
+ /* On the first call to ibm,update-properties for a node the
+ * first property value descriptor contains an empty
+ * property name, the property value length encoded as u32,
+ * and the property value is the node path being updated.
+ */
+ if (*prop_data == 0) {
+ prop_data++;
+ vd = be32_to_cpu(*(__be32 *)prop_data);
+ prop_data += vd + sizeof(vd);
+ nprops--;
+ }
+
+ for (i = 0; i < nprops; i++) {
+ char *prop_name;
+
+ prop_name = prop_data;
+ prop_data += strlen(prop_name) + 1;
+ vd = be32_to_cpu(*(__be32 *)prop_data);
+ prop_data += sizeof(vd);
+
+ switch (vd) {
+ case 0x00000000:
+ /* name only property, nothing to do */
+ break;
+
+ case 0x80000000:
+ of_remove_property(dn, of_find_property(dn,
+ prop_name, NULL));
+ prop = NULL;
+ break;
+
+ default:
+ rc = update_dt_property(dn, &prop, prop_name,
+ vd, prop_data);
+ if (rc) {
+ pr_err("updating %s property failed: %d\n",
+ prop_name, rc);
+ }
+
+ prop_data += vd;
+ break;
+ }
+
+ cond_resched();
+ }
+
+ cond_resched();
+ } while (rtas_rc == 1);
+
+ kfree(rtas_buf);
+ return 0;
+}
+
+static int add_dt_node(struct device_node *parent_dn, __be32 drc_index)
+{
+ struct device_node *dn;
+ int rc;
+
+ dn = dlpar_configure_connector(drc_index, parent_dn);
+ if (!dn)
+ return -ENOENT;
+
+ /*
+ * Since delete_dt_node() ignores this node type, this is the
+ * necessary counterpart. We also know that a platform-facilities
+ * node returned from dlpar_configure_connector() has children
+ * attached, and dlpar_attach_node() only adds the parent, leaking
+ * the children. So ignore these on the add side for now.
+ */
+ if (of_node_is_type(dn, "ibm,platform-facilities")) {
+ pr_notice("ignoring add operation for %pOF\n", dn);
+ dlpar_free_cc_nodes(dn);
+ return 0;
+ }
+
+ rc = dlpar_attach_node(dn, parent_dn);
+ if (rc)
+ dlpar_free_cc_nodes(dn);
+
+ pr_debug("added node %pOFfp\n", dn);
+
+ return rc;
+}
+
+static int pseries_devicetree_update(s32 scope)
+{
+ char *rtas_buf;
+ __be32 *data;
+ int update_nodes_token;
+ int rc;
+
+ update_nodes_token = rtas_function_token(RTAS_FN_IBM_UPDATE_NODES);
+ if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
+ return 0;
+
+ rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!rtas_buf)
+ return -ENOMEM;
+
+ do {
+ rc = mobility_rtas_call(update_nodes_token, rtas_buf, scope);
+ if (rc && rc != 1)
+ break;
+
+ data = (__be32 *)rtas_buf + 4;
+ while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
+ int i;
+ u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
+ u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
+
+ data++;
+
+ for (i = 0; i < node_count; i++) {
+ struct device_node *np;
+ __be32 phandle = *data++;
+ __be32 drc_index;
+
+ np = of_find_node_by_phandle(be32_to_cpu(phandle));
+ if (!np) {
+ pr_warn("Failed lookup: phandle 0x%x for action 0x%x\n",
+ be32_to_cpu(phandle), action);
+ continue;
+ }
+
+ switch (action) {
+ case DELETE_DT_NODE:
+ delete_dt_node(np);
+ break;
+ case UPDATE_DT_NODE:
+ update_dt_node(np, scope);
+ break;
+ case ADD_DT_NODE:
+ drc_index = *data++;
+ add_dt_node(np, drc_index);
+ break;
+ }
+
+ of_node_put(np);
+ cond_resched();
+ }
+ }
+
+ cond_resched();
+ } while (rc == 1);
+
+ kfree(rtas_buf);
+ return rc;
+}
+
+void post_mobility_fixup(void)
+{
+ int rc;
+
+ rtas_activate_firmware();
+
+ /*
+ * We don't want CPUs to go online/offline while the device
+ * tree is being updated.
+ */
+ cpus_read_lock();
+
+ /*
+ * It's common for the destination firmware to replace cache
+ * nodes. Release all of the cacheinfo hierarchy's references
+ * before updating the device tree.
+ */
+ cacheinfo_teardown();
+
+ rc = pseries_devicetree_update(MIGRATION_SCOPE);
+ if (rc)
+ pr_err("device tree update failed: %d\n", rc);
+
+ cacheinfo_rebuild();
+
+ cpus_read_unlock();
+
+ /* Possibly switch to a new L1 flush type */
+ pseries_setup_security_mitigations();
+
+ /* Reinitialise system information for hv-24x7 */
+ read_24x7_sys_info();
+
+ return;
+}
+
+static int poll_vasi_state(u64 handle, unsigned long *res)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long hvrc;
+ int ret;
+
+ hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle);
+ switch (hvrc) {
+ case H_SUCCESS:
+ ret = 0;
+ *res = retbuf[0];
+ break;
+ case H_PARAMETER:
+ ret = -EINVAL;
+ break;
+ case H_FUNCTION:
+ ret = -EOPNOTSUPP;
+ break;
+ case H_HARDWARE:
+ default:
+ pr_err("unexpected H_VASI_STATE result %ld\n", hvrc);
+ ret = -EIO;
+ break;
+ }
+ return ret;
+}
+
+static int wait_for_vasi_session_suspending(u64 handle)
+{
+ unsigned long state;
+ int ret;
+
+ /*
+ * Wait for transition from H_VASI_ENABLED to
+ * H_VASI_SUSPENDING. Treat anything else as an error.
+ */
+ while (true) {
+ ret = poll_vasi_state(handle, &state);
+
+ if (ret != 0 || state == H_VASI_SUSPENDING) {
+ break;
+ } else if (state == H_VASI_ENABLED) {
+ ssleep(1);
+ } else {
+ pr_err("unexpected H_VASI_STATE result %lu\n", state);
+ ret = -EIO;
+ break;
+ }
+ }
+
+ /*
+ * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or
+ * ibm,suspend-me are also unimplemented, we'll recover then.
+ */
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+
+ return ret;
+}
+
+static void wait_for_vasi_session_completed(u64 handle)
+{
+ unsigned long state = 0;
+ int ret;
+
+ pr_info("waiting for memory transfer to complete...\n");
+
+ /*
+ * Wait for transition from H_VASI_RESUMED to H_VASI_COMPLETED.
+ */
+ while (true) {
+ ret = poll_vasi_state(handle, &state);
+
+ /*
+ * If the memory transfer is already complete and the migration
+ * has been cleaned up by the hypervisor, H_PARAMETER is return,
+ * which is translate in EINVAL by poll_vasi_state().
+ */
+ if (ret == -EINVAL || (!ret && state == H_VASI_COMPLETED)) {
+ pr_info("memory transfer completed.\n");
+ break;
+ }
+
+ if (ret) {
+ pr_err("H_VASI_STATE return error (%d)\n", ret);
+ break;
+ }
+
+ if (state != H_VASI_RESUMED) {
+ pr_err("unexpected H_VASI_STATE result %lu\n", state);
+ break;
+ }
+
+ msleep(500);
+ }
+}
+
+static void prod_single(unsigned int target_cpu)
+{
+ long hvrc;
+ int hwid;
+
+ hwid = get_hard_smp_processor_id(target_cpu);
+ hvrc = plpar_hcall_norets(H_PROD, hwid);
+ if (hvrc == H_SUCCESS)
+ return;
+ pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n",
+ target_cpu, hwid, hvrc);
+}
+
+static void prod_others(void)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu != smp_processor_id())
+ prod_single(cpu);
+ }
+}
+
+static u16 clamp_slb_size(void)
+{
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ u16 prev = mmu_slb_size;
+
+ slb_set_size(SLB_MIN_SIZE);
+
+ return prev;
+#else
+ return 0;
+#endif
+}
+
+static int do_suspend(void)
+{
+ u16 saved_slb_size;
+ int status;
+ int ret;
+
+ pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id());
+
+ /*
+ * The destination processor model may have fewer SLB entries
+ * than the source. We reduce mmu_slb_size to a safe minimum
+ * before suspending in order to minimize the possibility of
+ * programming non-existent entries on the destination. If
+ * suspend fails, we restore it before returning. On success
+ * the OF reconfig path will update it from the new device
+ * tree after resuming on the destination.
+ */
+ saved_slb_size = clamp_slb_size();
+
+ ret = rtas_ibm_suspend_me(&status);
+ if (ret != 0) {
+ pr_err("ibm,suspend-me error: %d\n", status);
+ slb_set_size(saved_slb_size);
+ }
+
+ return ret;
+}
+
+/**
+ * struct pseries_suspend_info - State shared between CPUs for join/suspend.
+ * @counter: Threads are to increment this upon resuming from suspend
+ * or if an error is received from H_JOIN. The thread which performs
+ * the first increment (i.e. sets it to 1) is responsible for
+ * waking the other threads.
+ * @done: False if join/suspend is in progress. True if the operation is
+ * complete (successful or not).
+ */
+struct pseries_suspend_info {
+ atomic_t counter;
+ bool done;
+};
+
+static int do_join(void *arg)
+{
+ struct pseries_suspend_info *info = arg;
+ atomic_t *counter = &info->counter;
+ long hvrc;
+ int ret;
+
+retry:
+ /* Must ensure MSR.EE off for H_JOIN. */
+ hard_irq_disable();
+ hvrc = plpar_hcall_norets(H_JOIN);
+
+ switch (hvrc) {
+ case H_CONTINUE:
+ /*
+ * All other CPUs are offline or in H_JOIN. This CPU
+ * attempts the suspend.
+ */
+ ret = do_suspend();
+ break;
+ case H_SUCCESS:
+ /*
+ * The suspend is complete and this cpu has received a
+ * prod, or we've received a stray prod from unrelated
+ * code (e.g. paravirt spinlocks) and we need to join
+ * again.
+ *
+ * This barrier orders the return from H_JOIN above vs
+ * the load of info->done. It pairs with the barrier
+ * in the wakeup/prod path below.
+ */
+ smp_mb();
+ if (READ_ONCE(info->done) == false) {
+ pr_info_ratelimited("premature return from H_JOIN on CPU %i, retrying",
+ smp_processor_id());
+ goto retry;
+ }
+ ret = 0;
+ break;
+ case H_BAD_MODE:
+ case H_HARDWARE:
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("H_JOIN error %ld on CPU %i\n",
+ hvrc, smp_processor_id());
+ break;
+ }
+
+ if (atomic_inc_return(counter) == 1) {
+ pr_info("CPU %u waking all threads\n", smp_processor_id());
+ WRITE_ONCE(info->done, true);
+ /*
+ * This barrier orders the store to info->done vs subsequent
+ * H_PRODs to wake the other CPUs. It pairs with the barrier
+ * in the H_SUCCESS case above.
+ */
+ smp_mb();
+ prod_others();
+ }
+ /*
+ * Execution may have been suspended for several seconds, so reset
+ * the watchdogs. touch_nmi_watchdog() also touches the soft lockup
+ * watchdog.
+ */
+ rcu_cpu_stall_reset();
+ touch_nmi_watchdog();
+
+ return ret;
+}
+
+/*
+ * Abort reason code byte 0. We use only the 'Migrating partition' value.
+ */
+enum vasi_aborting_entity {
+ ORCHESTRATOR = 1,
+ VSP_SOURCE = 2,
+ PARTITION_FIRMWARE = 3,
+ PLATFORM_FIRMWARE = 4,
+ VSP_TARGET = 5,
+ MIGRATING_PARTITION = 6,
+};
+
+static void pseries_cancel_migration(u64 handle, int err)
+{
+ u32 reason_code;
+ u32 detail;
+ u8 entity;
+ long hvrc;
+
+ entity = MIGRATING_PARTITION;
+ detail = abs(err) & 0xffffff;
+ reason_code = (entity << 24) | detail;
+
+ hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle,
+ H_VASI_SIGNAL_CANCEL, reason_code);
+ if (hvrc)
+ pr_err("H_VASI_SIGNAL error: %ld\n", hvrc);
+}
+
+static int pseries_suspend(u64 handle)
+{
+ const unsigned int max_attempts = 5;
+ unsigned int retry_interval_ms = 1;
+ unsigned int attempt = 1;
+ int ret;
+
+ while (true) {
+ struct pseries_suspend_info info;
+ unsigned long vasi_state;
+ int vasi_err;
+
+ info = (struct pseries_suspend_info) {
+ .counter = ATOMIC_INIT(0),
+ .done = false,
+ };
+
+ ret = stop_machine(do_join, &info, cpu_online_mask);
+ if (ret == 0)
+ break;
+ /*
+ * Encountered an error. If the VASI stream is still
+ * in Suspending state, it's likely a transient
+ * condition related to some device in the partition
+ * and we can retry in the hope that the cause has
+ * cleared after some delay.
+ *
+ * A better design would allow drivers etc to prepare
+ * for the suspend and avoid conditions which prevent
+ * the suspend from succeeding. For now, we have this
+ * mitigation.
+ */
+ pr_notice("Partition suspend attempt %u of %u error: %d\n",
+ attempt, max_attempts, ret);
+
+ if (attempt == max_attempts)
+ break;
+
+ vasi_err = poll_vasi_state(handle, &vasi_state);
+ if (vasi_err == 0) {
+ if (vasi_state != H_VASI_SUSPENDING) {
+ pr_notice("VASI state %lu after failed suspend\n",
+ vasi_state);
+ break;
+ }
+ } else if (vasi_err != -EOPNOTSUPP) {
+ pr_err("VASI state poll error: %d", vasi_err);
+ break;
+ }
+
+ pr_notice("Will retry partition suspend after %u ms\n",
+ retry_interval_ms);
+
+ msleep(retry_interval_ms);
+ retry_interval_ms *= 10;
+ attempt++;
+ }
+
+ return ret;
+}
+
+static int pseries_migrate_partition(u64 handle)
+{
+ int ret;
+ unsigned int factor = 0;
+
+#ifdef CONFIG_PPC_WATCHDOG
+ factor = nmi_wd_lpm_factor;
+#endif
+ /*
+ * When the migration is initiated, the hypervisor changes VAS
+ * mappings to prepare before OS gets the notification and
+ * closes all VAS windows. NX generates continuous faults during
+ * this time and the user space can not differentiate these
+ * faults from the migration event. So reduce this time window
+ * by closing VAS windows at the beginning of this function.
+ */
+ vas_migration_handler(VAS_SUSPEND);
+
+ ret = wait_for_vasi_session_suspending(handle);
+ if (ret)
+ goto out;
+
+ if (factor)
+ watchdog_hardlockup_set_timeout_pct(factor);
+
+ ret = pseries_suspend(handle);
+ if (ret == 0) {
+ post_mobility_fixup();
+ /*
+ * Wait until the memory transfer is complete, so that the user
+ * space process returns from the syscall after the transfer is
+ * complete. This allows the user hooks to be executed at the
+ * right time.
+ */
+ wait_for_vasi_session_completed(handle);
+ } else
+ pseries_cancel_migration(handle, ret);
+
+ if (factor)
+ watchdog_hardlockup_set_timeout_pct(0);
+
+out:
+ vas_migration_handler(VAS_RESUME);
+
+ return ret;
+}
+
+int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
+{
+ return pseries_migrate_partition(handle);
+}
+
+static ssize_t migration_store(const struct class *class,
+ const struct class_attribute *attr, const char *buf,
+ size_t count)
+{
+ u64 streamid;
+ int rc;
+
+ rc = kstrtou64(buf, 0, &streamid);
+ if (rc)
+ return rc;
+
+ rc = pseries_migrate_partition(streamid);
+ if (rc)
+ return rc;
+
+ return count;
+}
+
+/*
+ * Used by drmgr to determine the kernel behavior of the migration interface.
+ *
+ * Version 1: Performs all PAPR requirements for migration including
+ * firmware activation and device tree update.
+ */
+#define MIGRATION_API_VERSION 1
+
+static CLASS_ATTR_WO(migration);
+static CLASS_ATTR_STRING(api_version, 0444, __stringify(MIGRATION_API_VERSION));
+
+static int __init mobility_sysfs_init(void)
+{
+ int rc;
+
+ mobility_kobj = kobject_create_and_add("mobility", kernel_kobj);
+ if (!mobility_kobj)
+ return -ENOMEM;
+
+ rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
+ if (rc)
+ pr_err("unable to create migration sysfs file (%d)\n", rc);
+
+ rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr);
+ if (rc)
+ pr_err("unable to create api_version sysfs file (%d)\n", rc);
+
+ return 0;
+}
+machine_device_initcall(pseries, mobility_sysfs_init);
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
new file mode 100644
index 000000000..423ee1d5b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -0,0 +1,698 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2006 Jake Moilanen <moilanen@austin.ibm.com>, IBM Corp.
+ * Copyright 2006-2007 Michael Ellerman, IBM Corp.
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/device.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+
+#include <asm/rtas.h>
+#include <asm/hw_irq.h>
+#include <asm/ppc-pci.h>
+#include <asm/machdep.h>
+#include <asm/xive.h>
+
+#include "pseries.h"
+
+static int query_token, change_token;
+
+#define RTAS_QUERY_FN 0
+#define RTAS_CHANGE_FN 1
+#define RTAS_RESET_FN 2
+#define RTAS_CHANGE_MSI_FN 3
+#define RTAS_CHANGE_MSIX_FN 4
+#define RTAS_CHANGE_32MSI_FN 5
+
+/* RTAS Helpers */
+
+static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs)
+{
+ u32 addr, seq_num, rtas_ret[3];
+ unsigned long buid;
+ int rc;
+
+ addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ buid = pdn->phb->buid;
+
+ seq_num = 1;
+ do {
+ if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN ||
+ func == RTAS_CHANGE_32MSI_FN)
+ rc = rtas_call(change_token, 6, 4, rtas_ret, addr,
+ BUID_HI(buid), BUID_LO(buid),
+ func, num_irqs, seq_num);
+ else
+ rc = rtas_call(change_token, 6, 3, rtas_ret, addr,
+ BUID_HI(buid), BUID_LO(buid),
+ func, num_irqs, seq_num);
+
+ seq_num = rtas_ret[1];
+ } while (rtas_busy_delay(rc));
+
+ /*
+ * If the RTAS call succeeded, return the number of irqs allocated.
+ * If not, make sure we return a negative error code.
+ */
+ if (rc == 0)
+ rc = rtas_ret[0];
+ else if (rc > 0)
+ rc = -rc;
+
+ pr_debug("rtas_msi: ibm,change_msi(func=%d,num=%d), got %d rc = %d\n",
+ func, num_irqs, rtas_ret[0], rc);
+
+ return rc;
+}
+
+static void rtas_disable_msi(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn;
+
+ pdn = pci_get_pdn(pdev);
+ if (!pdn)
+ return;
+
+ /*
+ * disabling MSI with the explicit interface also disables MSI-X
+ */
+ if (rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, 0) != 0) {
+ /*
+ * may have failed because explicit interface is not
+ * present
+ */
+ if (rtas_change_msi(pdn, RTAS_CHANGE_FN, 0) != 0) {
+ pr_debug("rtas_msi: Setting MSIs to 0 failed!\n");
+ }
+ }
+}
+
+static int rtas_query_irq_number(struct pci_dn *pdn, int offset)
+{
+ u32 addr, rtas_ret[2];
+ unsigned long buid;
+ int rc;
+
+ addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ buid = pdn->phb->buid;
+
+ do {
+ rc = rtas_call(query_token, 4, 3, rtas_ret, addr,
+ BUID_HI(buid), BUID_LO(buid), offset);
+ } while (rtas_busy_delay(rc));
+
+ if (rc) {
+ pr_debug("rtas_msi: error (%d) querying source number\n", rc);
+ return rc;
+ }
+
+ return rtas_ret[0];
+}
+
+static int check_req(struct pci_dev *pdev, int nvec, char *prop_name)
+{
+ struct device_node *dn;
+ const __be32 *p;
+ u32 req_msi;
+
+ dn = pci_device_to_OF_node(pdev);
+
+ p = of_get_property(dn, prop_name, NULL);
+ if (!p) {
+ pr_debug("rtas_msi: No %s on %pOF\n", prop_name, dn);
+ return -ENOENT;
+ }
+
+ req_msi = be32_to_cpup(p);
+ if (req_msi < nvec) {
+ pr_debug("rtas_msi: %s requests < %d MSIs\n", prop_name, nvec);
+
+ if (req_msi == 0) /* Be paranoid */
+ return -ENOSPC;
+
+ return req_msi;
+ }
+
+ return 0;
+}
+
+static int check_req_msi(struct pci_dev *pdev, int nvec)
+{
+ return check_req(pdev, nvec, "ibm,req#msi");
+}
+
+static int check_req_msix(struct pci_dev *pdev, int nvec)
+{
+ return check_req(pdev, nvec, "ibm,req#msi-x");
+}
+
+/* Quota calculation */
+
+static struct device_node *__find_pe_total_msi(struct device_node *node, int *total)
+{
+ struct device_node *dn;
+ const __be32 *p;
+
+ dn = of_node_get(node);
+ while (dn) {
+ p = of_get_property(dn, "ibm,pe-total-#msi", NULL);
+ if (p) {
+ pr_debug("rtas_msi: found prop on dn %pOF\n",
+ dn);
+ *total = be32_to_cpup(p);
+ return dn;
+ }
+
+ dn = of_get_next_parent(dn);
+ }
+
+ return NULL;
+}
+
+static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
+{
+ return __find_pe_total_msi(pci_device_to_OF_node(dev), total);
+}
+
+static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
+{
+ struct device_node *dn;
+ struct eeh_dev *edev;
+
+ /* Found our PE and assume 8 at that point. */
+
+ dn = pci_device_to_OF_node(dev);
+ if (!dn)
+ return NULL;
+
+ /* Get the top level device in the PE */
+ edev = pdn_to_eeh_dev(PCI_DN(dn));
+ if (edev->pe)
+ edev = list_first_entry(&edev->pe->edevs, struct eeh_dev,
+ entry);
+ dn = pci_device_to_OF_node(edev->pdev);
+ if (!dn)
+ return NULL;
+
+ /* We actually want the parent */
+ dn = of_get_parent(dn);
+ if (!dn)
+ return NULL;
+
+ /* Hardcode of 8 for old firmwares */
+ *total = 8;
+ pr_debug("rtas_msi: using PE dn %pOF\n", dn);
+
+ return dn;
+}
+
+struct msi_counts {
+ struct device_node *requestor;
+ int num_devices;
+ int request;
+ int quota;
+ int spare;
+ int over_quota;
+};
+
+static void *count_non_bridge_devices(struct device_node *dn, void *data)
+{
+ struct msi_counts *counts = data;
+ const __be32 *p;
+ u32 class;
+
+ pr_debug("rtas_msi: counting %pOF\n", dn);
+
+ p = of_get_property(dn, "class-code", NULL);
+ class = p ? be32_to_cpup(p) : 0;
+
+ if ((class >> 8) != PCI_CLASS_BRIDGE_PCI)
+ counts->num_devices++;
+
+ return NULL;
+}
+
+static void *count_spare_msis(struct device_node *dn, void *data)
+{
+ struct msi_counts *counts = data;
+ const __be32 *p;
+ int req;
+
+ if (dn == counts->requestor)
+ req = counts->request;
+ else {
+ /* We don't know if a driver will try to use MSI or MSI-X,
+ * so we just have to punt and use the larger of the two. */
+ req = 0;
+ p = of_get_property(dn, "ibm,req#msi", NULL);
+ if (p)
+ req = be32_to_cpup(p);
+
+ p = of_get_property(dn, "ibm,req#msi-x", NULL);
+ if (p)
+ req = max(req, (int)be32_to_cpup(p));
+ }
+
+ if (req < counts->quota)
+ counts->spare += counts->quota - req;
+ else if (req > counts->quota)
+ counts->over_quota++;
+
+ return NULL;
+}
+
+static int msi_quota_for_device(struct pci_dev *dev, int request)
+{
+ struct device_node *pe_dn;
+ struct msi_counts counts;
+ int total;
+
+ pr_debug("rtas_msi: calc quota for %s, request %d\n", pci_name(dev),
+ request);
+
+ pe_dn = find_pe_total_msi(dev, &total);
+ if (!pe_dn)
+ pe_dn = find_pe_dn(dev, &total);
+
+ if (!pe_dn) {
+ pr_err("rtas_msi: couldn't find PE for %s\n", pci_name(dev));
+ goto out;
+ }
+
+ pr_debug("rtas_msi: found PE %pOF\n", pe_dn);
+
+ memset(&counts, 0, sizeof(struct msi_counts));
+
+ /* Work out how many devices we have below this PE */
+ pci_traverse_device_nodes(pe_dn, count_non_bridge_devices, &counts);
+
+ if (counts.num_devices == 0) {
+ pr_err("rtas_msi: found 0 devices under PE for %s\n",
+ pci_name(dev));
+ goto out;
+ }
+
+ counts.quota = total / counts.num_devices;
+ if (request <= counts.quota)
+ goto out;
+
+ /* else, we have some more calculating to do */
+ counts.requestor = pci_device_to_OF_node(dev);
+ counts.request = request;
+ pci_traverse_device_nodes(pe_dn, count_spare_msis, &counts);
+
+ /* If the quota isn't an integer multiple of the total, we can
+ * use the remainder as spare MSIs for anyone that wants them. */
+ counts.spare += total % counts.num_devices;
+
+ /* Divide any spare by the number of over-quota requestors */
+ if (counts.over_quota)
+ counts.quota += counts.spare / counts.over_quota;
+
+ /* And finally clamp the request to the possibly adjusted quota */
+ request = min(counts.quota, request);
+
+ pr_debug("rtas_msi: request clamped to quota %d\n", request);
+out:
+ of_node_put(pe_dn);
+
+ return request;
+}
+
+static void rtas_hack_32bit_msi_gen2(struct pci_dev *pdev)
+{
+ u32 addr_hi, addr_lo;
+
+ /*
+ * We should only get in here for IODA1 configs. This is based on the
+ * fact that we using RTAS for MSIs, we don't have the 32 bit MSI RTAS
+ * support, and we are in a PCIe Gen2 slot.
+ */
+ dev_info(&pdev->dev,
+ "rtas_msi: No 32 bit MSI firmware support, forcing 32 bit MSI\n");
+ pci_read_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, &addr_hi);
+ addr_lo = 0xffff0000 | ((addr_hi >> (48 - 32)) << 4);
+ pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, addr_lo);
+ pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, 0);
+}
+
+static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type,
+ msi_alloc_info_t *arg)
+{
+ struct pci_dn *pdn;
+ int quota, rc;
+ int nvec = nvec_in;
+ int use_32bit_msi_hack = 0;
+
+ if (type == PCI_CAP_ID_MSIX)
+ rc = check_req_msix(pdev, nvec);
+ else
+ rc = check_req_msi(pdev, nvec);
+
+ if (rc)
+ return rc;
+
+ quota = msi_quota_for_device(pdev, nvec);
+
+ if (quota && quota < nvec)
+ return quota;
+
+ /*
+ * Firmware currently refuse any non power of two allocation
+ * so we round up if the quota will allow it.
+ */
+ if (type == PCI_CAP_ID_MSIX) {
+ int m = roundup_pow_of_two(nvec);
+ quota = msi_quota_for_device(pdev, m);
+
+ if (quota >= m)
+ nvec = m;
+ }
+
+ pdn = pci_get_pdn(pdev);
+
+ /*
+ * Try the new more explicit firmware interface, if that fails fall
+ * back to the old interface. The old interface is known to never
+ * return MSI-Xs.
+ */
+again:
+ if (type == PCI_CAP_ID_MSI) {
+ if (pdev->no_64bit_msi) {
+ rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSI_FN, nvec);
+ if (rc < 0) {
+ /*
+ * We only want to run the 32 bit MSI hack below if
+ * the max bus speed is Gen2 speed
+ */
+ if (pdev->bus->max_bus_speed != PCIE_SPEED_5_0GT)
+ return rc;
+
+ use_32bit_msi_hack = 1;
+ }
+ } else
+ rc = -1;
+
+ if (rc < 0)
+ rc = rtas_change_msi(pdn, RTAS_CHANGE_MSI_FN, nvec);
+
+ if (rc < 0) {
+ pr_debug("rtas_msi: trying the old firmware call.\n");
+ rc = rtas_change_msi(pdn, RTAS_CHANGE_FN, nvec);
+ }
+
+ if (use_32bit_msi_hack && rc > 0)
+ rtas_hack_32bit_msi_gen2(pdev);
+ } else
+ rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec);
+
+ if (rc != nvec) {
+ if (nvec != nvec_in) {
+ nvec = nvec_in;
+ goto again;
+ }
+ pr_debug("rtas_msi: rtas_change_msi() failed\n");
+ return rc;
+ }
+
+ return 0;
+}
+
+static int pseries_msi_ops_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *arg)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ int type = pdev->msix_enabled ? PCI_CAP_ID_MSIX : PCI_CAP_ID_MSI;
+
+ return rtas_prepare_msi_irqs(pdev, nvec, type, arg);
+}
+
+/*
+ * ->msi_free() is called before irq_domain_free_irqs_top() when the
+ * handler data is still available. Use that to clear the XIVE
+ * controller data.
+ */
+static void pseries_msi_ops_msi_free(struct irq_domain *domain,
+ struct msi_domain_info *info,
+ unsigned int irq)
+{
+ if (xive_enabled())
+ xive_irq_free_data(irq);
+}
+
+/*
+ * RTAS can not disable one MSI at a time. It's all or nothing. Do it
+ * at the end after all IRQs have been freed.
+ */
+static void pseries_msi_post_free(struct irq_domain *domain, struct device *dev)
+{
+ if (WARN_ON_ONCE(!dev_is_pci(dev)))
+ return;
+
+ rtas_disable_msi(to_pci_dev(dev));
+}
+
+static struct msi_domain_ops pseries_pci_msi_domain_ops = {
+ .msi_prepare = pseries_msi_ops_prepare,
+ .msi_free = pseries_msi_ops_msi_free,
+ .msi_post_free = pseries_msi_post_free,
+};
+
+static void pseries_msi_shutdown(struct irq_data *d)
+{
+ d = d->parent_data;
+ if (d->chip->irq_shutdown)
+ d->chip->irq_shutdown(d);
+}
+
+static void pseries_msi_mask(struct irq_data *d)
+{
+ pci_msi_mask_irq(d);
+ irq_chip_mask_parent(d);
+}
+
+static void pseries_msi_unmask(struct irq_data *d)
+{
+ pci_msi_unmask_irq(d);
+ irq_chip_unmask_parent(d);
+}
+
+static void pseries_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ struct msi_desc *entry = irq_data_get_msi_desc(data);
+
+ /*
+ * Do not update the MSIx vector table. It's not strictly necessary
+ * because the table is initialized by the underlying hypervisor, PowerVM
+ * or QEMU/KVM. However, if the MSIx vector entry is cleared, any further
+ * activation will fail. This can happen in some drivers (eg. IPR) which
+ * deactivate an IRQ used for testing MSI support.
+ */
+ entry->msg = *msg;
+}
+
+static struct irq_chip pseries_pci_msi_irq_chip = {
+ .name = "pSeries-PCI-MSI",
+ .irq_shutdown = pseries_msi_shutdown,
+ .irq_mask = pseries_msi_mask,
+ .irq_unmask = pseries_msi_unmask,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_write_msi_msg = pseries_msi_write_msg,
+};
+
+
+/*
+ * Set MSI_FLAG_MSIX_CONTIGUOUS as there is no way to express to
+ * firmware to request a discontiguous or non-zero based range of
+ * MSI-X entries. Core code will reject such setup attempts.
+ */
+static struct msi_domain_info pseries_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX |
+ MSI_FLAG_MSIX_CONTIGUOUS),
+ .ops = &pseries_pci_msi_domain_ops,
+ .chip = &pseries_pci_msi_irq_chip,
+};
+
+static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ __pci_read_msi_msg(irq_data_get_msi_desc(data), msg);
+}
+
+static struct irq_chip pseries_msi_irq_chip = {
+ .name = "pSeries-MSI",
+ .irq_shutdown = pseries_msi_shutdown,
+ .irq_mask = irq_chip_mask_parent,
+ .irq_unmask = irq_chip_unmask_parent,
+ .irq_eoi = irq_chip_eoi_parent,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
+ .irq_compose_msi_msg = pseries_msi_compose_msg,
+};
+
+static int pseries_irq_parent_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ irq_hw_number_t hwirq)
+{
+ struct irq_fwspec parent_fwspec;
+ int ret;
+
+ parent_fwspec.fwnode = domain->parent->fwnode;
+ parent_fwspec.param_count = 2;
+ parent_fwspec.param[0] = hwirq;
+ parent_fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &parent_fwspec);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int pseries_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct pci_controller *phb = domain->host_data;
+ msi_alloc_info_t *info = arg;
+ struct msi_desc *desc = info->desc;
+ struct pci_dev *pdev = msi_desc_to_pci_dev(desc);
+ int hwirq;
+ int i, ret;
+
+ hwirq = rtas_query_irq_number(pci_get_pdn(pdev), desc->msi_index);
+ if (hwirq < 0) {
+ dev_err(&pdev->dev, "Failed to query HW IRQ: %d\n", hwirq);
+ return hwirq;
+ }
+
+ dev_dbg(&pdev->dev, "%s bridge %pOF %d/%x #%d\n", __func__,
+ phb->dn, virq, hwirq, nr_irqs);
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = pseries_irq_parent_domain_alloc(domain, virq + i, hwirq + i);
+ if (ret)
+ goto out;
+
+ irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq + i,
+ &pseries_msi_irq_chip, domain->host_data);
+ }
+
+ return 0;
+
+out:
+ /* TODO: handle RTAS cleanup in ->msi_finish() ? */
+ irq_domain_free_irqs_parent(domain, virq, i - 1);
+ return ret;
+}
+
+static void pseries_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct pci_controller *phb = irq_data_get_irq_chip_data(d);
+
+ pr_debug("%s bridge %pOF %d #%d\n", __func__, phb->dn, virq, nr_irqs);
+
+ /* XIVE domain data is cleared through ->msi_free() */
+}
+
+static const struct irq_domain_ops pseries_irq_domain_ops = {
+ .alloc = pseries_irq_domain_alloc,
+ .free = pseries_irq_domain_free,
+};
+
+static int __pseries_msi_allocate_domains(struct pci_controller *phb,
+ unsigned int count)
+{
+ struct irq_domain *parent = irq_get_default_host();
+
+ phb->fwnode = irq_domain_alloc_named_id_fwnode("pSeries-MSI",
+ phb->global_number);
+ if (!phb->fwnode)
+ return -ENOMEM;
+
+ phb->dev_domain = irq_domain_create_hierarchy(parent, 0, count,
+ phb->fwnode,
+ &pseries_irq_domain_ops, phb);
+ if (!phb->dev_domain) {
+ pr_err("PCI: failed to create IRQ domain bridge %pOF (domain %d)\n",
+ phb->dn, phb->global_number);
+ irq_domain_free_fwnode(phb->fwnode);
+ return -ENOMEM;
+ }
+
+ phb->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(phb->dn),
+ &pseries_msi_domain_info,
+ phb->dev_domain);
+ if (!phb->msi_domain) {
+ pr_err("PCI: failed to create MSI IRQ domain bridge %pOF (domain %d)\n",
+ phb->dn, phb->global_number);
+ irq_domain_free_fwnode(phb->fwnode);
+ irq_domain_remove(phb->dev_domain);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int pseries_msi_allocate_domains(struct pci_controller *phb)
+{
+ int count;
+
+ if (!__find_pe_total_msi(phb->dn, &count)) {
+ pr_err("PCI: failed to find MSIs for bridge %pOF (domain %d)\n",
+ phb->dn, phb->global_number);
+ return -ENOSPC;
+ }
+
+ return __pseries_msi_allocate_domains(phb, count);
+}
+
+void pseries_msi_free_domains(struct pci_controller *phb)
+{
+ if (phb->msi_domain)
+ irq_domain_remove(phb->msi_domain);
+ if (phb->dev_domain)
+ irq_domain_remove(phb->dev_domain);
+ if (phb->fwnode)
+ irq_domain_free_fwnode(phb->fwnode);
+}
+
+static void rtas_msi_pci_irq_fixup(struct pci_dev *pdev)
+{
+ /* No LSI -> leave MSIs (if any) configured */
+ if (!pdev->irq) {
+ dev_dbg(&pdev->dev, "rtas_msi: no LSI, nothing to do.\n");
+ return;
+ }
+
+ /* No MSI -> MSIs can't have been assigned by fw, leave LSI */
+ if (check_req_msi(pdev, 1) && check_req_msix(pdev, 1)) {
+ dev_dbg(&pdev->dev, "rtas_msi: no req#msi/x, nothing to do.\n");
+ return;
+ }
+
+ dev_dbg(&pdev->dev, "rtas_msi: disabling existing MSI.\n");
+ rtas_disable_msi(pdev);
+}
+
+static int rtas_msi_init(void)
+{
+ query_token = rtas_function_token(RTAS_FN_IBM_QUERY_INTERRUPT_SOURCE_NUMBER);
+ change_token = rtas_function_token(RTAS_FN_IBM_CHANGE_MSI);
+
+ if ((query_token == RTAS_UNKNOWN_SERVICE) ||
+ (change_token == RTAS_UNKNOWN_SERVICE)) {
+ pr_debug("rtas_msi: no RTAS tokens, no MSI support.\n");
+ return -1;
+ }
+
+ pr_debug("rtas_msi: Registering RTAS MSI callbacks.\n");
+
+ WARN_ON(ppc_md.pci_irq_fixup);
+ ppc_md.pci_irq_fixup = rtas_msi_pci_irq_fixup;
+
+ return 0;
+}
+machine_arch_initcall(pseries, rtas_msi_init);
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
new file mode 100644
index 000000000..8130c3796
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * c 2001 PPC 64 Team, IBM Corp
+ *
+ * /dev/nvram driver for PPC64
+ */
+
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
+#include <asm/nvram.h>
+#include <asm/rtas.h>
+#include <asm/machdep.h>
+
+/* Max bytes to read/write in one go */
+#define NVRW_CNT 0x20
+
+static unsigned int nvram_size;
+static int nvram_fetch, nvram_store;
+static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
+static DEFINE_SPINLOCK(nvram_lock);
+
+/* See clobbering_unread_rtas_event() */
+#define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */
+static time64_t last_unread_rtas_event; /* timestamp */
+
+#ifdef CONFIG_PSTORE
+time64_t last_rtas_event;
+#endif
+
+static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
+{
+ unsigned int i;
+ unsigned long len;
+ int done;
+ unsigned long flags;
+ char *p = buf;
+
+
+ if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
+ return -ENODEV;
+
+ if (*index >= nvram_size)
+ return 0;
+
+ i = *index;
+ if (i + count > nvram_size)
+ count = nvram_size - i;
+
+ spin_lock_irqsave(&nvram_lock, flags);
+
+ for (; count != 0; count -= len) {
+ len = count;
+ if (len > NVRW_CNT)
+ len = NVRW_CNT;
+
+ if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
+ len) != 0) || len != done) {
+ spin_unlock_irqrestore(&nvram_lock, flags);
+ return -EIO;
+ }
+
+ memcpy(p, nvram_buf, len);
+
+ p += len;
+ i += len;
+ }
+
+ spin_unlock_irqrestore(&nvram_lock, flags);
+
+ *index = i;
+ return p - buf;
+}
+
+static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
+{
+ unsigned int i;
+ unsigned long len;
+ int done;
+ unsigned long flags;
+ const char *p = buf;
+
+ if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
+ return -ENODEV;
+
+ if (*index >= nvram_size)
+ return 0;
+
+ i = *index;
+ if (i + count > nvram_size)
+ count = nvram_size - i;
+
+ spin_lock_irqsave(&nvram_lock, flags);
+
+ for (; count != 0; count -= len) {
+ len = count;
+ if (len > NVRW_CNT)
+ len = NVRW_CNT;
+
+ memcpy(nvram_buf, p, len);
+
+ if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
+ len) != 0) || len != done) {
+ spin_unlock_irqrestore(&nvram_lock, flags);
+ return -EIO;
+ }
+
+ p += len;
+ i += len;
+ }
+ spin_unlock_irqrestore(&nvram_lock, flags);
+
+ *index = i;
+ return p - buf;
+}
+
+static ssize_t pSeries_nvram_get_size(void)
+{
+ return nvram_size ? nvram_size : -ENODEV;
+}
+
+/* nvram_write_error_log
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode.
+ */
+int nvram_write_error_log(char * buff, int length,
+ unsigned int err_type, unsigned int error_log_cnt)
+{
+ int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
+ err_type, error_log_cnt);
+ if (!rc) {
+ last_unread_rtas_event = ktime_get_real_seconds();
+#ifdef CONFIG_PSTORE
+ last_rtas_event = ktime_get_real_seconds();
+#endif
+ }
+
+ return rc;
+}
+
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char *buff, int length,
+ unsigned int *err_type, unsigned int *error_log_cnt)
+{
+ return nvram_read_partition(&rtas_log_partition, buff, length,
+ err_type, error_log_cnt);
+}
+
+/* This doesn't actually zero anything, but it sets the event_logged
+ * word to tell that this event is safely in syslog.
+ */
+int nvram_clear_error_log(void)
+{
+ loff_t tmp_index;
+ int clear_word = ERR_FLAG_ALREADY_LOGGED;
+ int rc;
+
+ if (rtas_log_partition.index == -1)
+ return -1;
+
+ tmp_index = rtas_log_partition.index;
+
+ rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
+ return rc;
+ }
+ last_unread_rtas_event = 0;
+
+ return 0;
+}
+
+/*
+ * Are we using the ibm,rtas-log for oops/panic reports? And if so,
+ * would logging this oops/panic overwrite an RTAS event that rtas_errd
+ * hasn't had a chance to read and process? Return 1 if so, else 0.
+ *
+ * We assume that if rtas_errd hasn't read the RTAS event in
+ * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
+ */
+int clobbering_unread_rtas_event(void)
+{
+ return (oops_log_partition.index == rtas_log_partition.index
+ && last_unread_rtas_event
+ && ktime_get_real_seconds() - last_unread_rtas_event <=
+ NVRAM_RTAS_READ_TIMEOUT);
+}
+
+static int __init pseries_nvram_init_log_partitions(void)
+{
+ int rc;
+
+ /* Scan nvram for partitions */
+ nvram_scan_partitions();
+
+ rc = nvram_init_os_partition(&rtas_log_partition);
+ nvram_init_oops_partition(rc == 0);
+ return 0;
+}
+machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
+
+int __init pSeries_nvram_init(void)
+{
+ struct device_node *nvram;
+ const __be32 *nbytes_p;
+ unsigned int proplen;
+
+ nvram = of_find_node_by_type(NULL, "nvram");
+ if (nvram == NULL)
+ return -ENODEV;
+
+ nbytes_p = of_get_property(nvram, "#bytes", &proplen);
+ if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
+ of_node_put(nvram);
+ return -EIO;
+ }
+
+ nvram_size = be32_to_cpup(nbytes_p);
+
+ nvram_fetch = rtas_function_token(RTAS_FN_NVRAM_FETCH);
+ nvram_store = rtas_function_token(RTAS_FN_NVRAM_STORE);
+ printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
+ of_node_put(nvram);
+
+ ppc_md.nvram_read = pSeries_nvram_read;
+ ppc_md.nvram_write = pSeries_nvram_write;
+ ppc_md.nvram_size = pSeries_nvram_get_size;
+
+ return 0;
+}
+
diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c
new file mode 100644
index 000000000..23241c71e
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/of_helpers.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <asm/prom.h>
+
+#include "of_helpers.h"
+
+/**
+ * pseries_of_derive_parent - basically like dirname(1)
+ * @path: the full_name of a node to be added to the tree
+ *
+ * Returns the node which should be the parent of the node
+ * described by path. E.g., for path = "/foo/bar", returns
+ * the node with full_name = "/foo".
+ */
+struct device_node *pseries_of_derive_parent(const char *path)
+{
+ struct device_node *parent;
+ char *parent_path = "/";
+ const char *tail;
+
+ /* We do not want the trailing '/' character */
+ tail = kbasename(path) - 1;
+
+ /* reject if path is "/" */
+ if (!strcmp(path, "/"))
+ return ERR_PTR(-EINVAL);
+
+ if (tail > path) {
+ parent_path = kstrndup(path, tail - path, GFP_KERNEL);
+ if (!parent_path)
+ return ERR_PTR(-ENOMEM);
+ }
+ parent = of_find_node_by_path(parent_path);
+ if (strcmp(parent_path, "/"))
+ kfree(parent_path);
+ return parent ? parent : ERR_PTR(-EINVAL);
+}
+
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
+ struct of_drc_info *data)
+{
+ const char *p = (char *)(*curval);
+ const __be32 *p2;
+
+ if (!data)
+ return -EINVAL;
+
+ /* Get drc-type:encode-string */
+ data->drc_type = (char *)p;
+ p = of_prop_next_string(*prop, p);
+ if (!p)
+ return -EINVAL;
+
+ /* Get drc-name-prefix:encode-string */
+ data->drc_name_prefix = (char *)p;
+ p = of_prop_next_string(*prop, p);
+ if (!p)
+ return -EINVAL;
+
+ /* Get drc-index-start:encode-int */
+ p2 = (const __be32 *)p;
+ data->drc_index_start = be32_to_cpu(*p2);
+
+ /* Get drc-name-suffix-start:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get number-sequential-elements:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get sequential-increment:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get drc-power-domain:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain);
+ if (!p2)
+ return -EINVAL;
+
+ /* Should now know end of current entry */
+ (*curval) = (void *)(++p2);
+ data->last_drc_index = data->drc_index_start +
+ ((data->num_sequential_elems - 1) * data->sequential_inc);
+
+ return 0;
+}
+EXPORT_SYMBOL(of_read_drc_info_cell);
diff --git a/arch/powerpc/platforms/pseries/of_helpers.h b/arch/powerpc/platforms/pseries/of_helpers.h
new file mode 100644
index 000000000..decad6553
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/of_helpers.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PSERIES_OF_HELPERS_H
+#define _PSERIES_OF_HELPERS_H
+
+#include <linux/of.h>
+
+struct device_node *pseries_of_derive_parent(const char *path);
+
+#endif /* _PSERIES_OF_HELPERS_H */
diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c b/arch/powerpc/platforms/pseries/papr-sysparm.c
new file mode 100644
index 000000000..fedc61599
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr-sysparm.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "papr-sysparm: " fmt
+
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <asm/rtas.h>
+#include <asm/papr-sysparm.h>
+#include <asm/rtas-work-area.h>
+
+struct papr_sysparm_buf *papr_sysparm_buf_alloc(void)
+{
+ struct papr_sysparm_buf *buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+
+ return buf;
+}
+
+void papr_sysparm_buf_free(struct papr_sysparm_buf *buf)
+{
+ kfree(buf);
+}
+
+/**
+ * papr_sysparm_get() - Retrieve the value of a PAPR system parameter.
+ * @param: PAPR system parameter token as described in
+ * 7.3.16 "System Parameters Option".
+ * @buf: A &struct papr_sysparm_buf as returned from papr_sysparm_buf_alloc().
+ *
+ * Place the result of querying the specified parameter, if available,
+ * in @buf. The result includes a be16 length header followed by the
+ * value, which may be a string or binary data. See &struct papr_sysparm_buf.
+ *
+ * Since there is at least one parameter (60, OS Service Entitlement
+ * Status) where the results depend on the incoming contents of the
+ * work area, the caller-supplied buffer is copied unmodified into the
+ * work area before calling ibm,get-system-parameter.
+ *
+ * A defined parameter may not be implemented on a given system, and
+ * some implemented parameters may not be available to all partitions
+ * on a system. A parameter's disposition may change at any time due
+ * to system configuration changes or partition migration.
+ *
+ * Context: This function may sleep.
+ *
+ * Return: 0 on success, -errno otherwise. @buf is unmodified on error.
+ */
+
+int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf)
+{
+ const s32 token = rtas_function_token(RTAS_FN_IBM_GET_SYSTEM_PARAMETER);
+ struct rtas_work_area *work_area;
+ s32 fwrc;
+ int ret;
+
+ might_sleep();
+
+ if (WARN_ON(!buf))
+ return -EFAULT;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ work_area = rtas_work_area_alloc(sizeof(*buf));
+
+ memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
+
+ do {
+ fwrc = rtas_call(token, 3, 1, NULL, param.token,
+ rtas_work_area_phys(work_area),
+ rtas_work_area_size(work_area));
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case 0:
+ ret = 0;
+ memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf));
+ break;
+ case -3: /* parameter not implemented */
+ ret = -EOPNOTSUPP;
+ break;
+ case -9002: /* this partition not authorized to retrieve this parameter */
+ ret = -EPERM;
+ break;
+ case -9999: /* "parameter error" e.g. the buffer is too small */
+ ret = -EINVAL;
+ break;
+ default:
+ pr_err("unexpected ibm,get-system-parameter result %d\n", fwrc);
+ fallthrough;
+ case -1: /* Hardware/platform error */
+ ret = -EIO;
+ break;
+ }
+
+ rtas_work_area_free(work_area);
+
+ return ret;
+}
+
+int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf)
+{
+ const s32 token = rtas_function_token(RTAS_FN_IBM_SET_SYSTEM_PARAMETER);
+ struct rtas_work_area *work_area;
+ s32 fwrc;
+ int ret;
+
+ might_sleep();
+
+ if (WARN_ON(!buf))
+ return -EFAULT;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ work_area = rtas_work_area_alloc(sizeof(*buf));
+
+ memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
+
+ do {
+ fwrc = rtas_call(token, 2, 1, NULL, param.token,
+ rtas_work_area_phys(work_area));
+ } while (rtas_busy_delay(fwrc));
+
+ switch (fwrc) {
+ case 0:
+ ret = 0;
+ break;
+ case -3: /* parameter not supported */
+ ret = -EOPNOTSUPP;
+ break;
+ case -9002: /* this partition not authorized to modify this parameter */
+ ret = -EPERM;
+ break;
+ case -9999: /* "parameter error" e.g. invalid input data */
+ ret = -EINVAL;
+ break;
+ default:
+ pr_err("unexpected ibm,set-system-parameter result %d\n", fwrc);
+ fallthrough;
+ case -1: /* Hardware/platform error */
+ ret = -EIO;
+ break;
+ }
+
+ rtas_work_area_free(work_area);
+
+ return ret;
+}
diff --git a/arch/powerpc/platforms/pseries/papr_platform_attributes.c b/arch/powerpc/platforms/pseries/papr_platform_attributes.c
new file mode 100644
index 000000000..526c621b0
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr_platform_attributes.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Platform energy and frequency attributes driver
+ *
+ * This driver creates a sys file at /sys/firmware/papr/ which encapsulates a
+ * directory structure containing files in keyword - value pairs that specify
+ * energy and frequency configuration of the system.
+ *
+ * The format of exposing the sysfs information is as follows:
+ * /sys/firmware/papr/energy_scale_info/
+ * |-- <id>/
+ * |-- desc
+ * |-- value
+ * |-- value_desc (if exists)
+ * |-- <id>/
+ * |-- desc
+ * |-- value
+ * |-- value_desc (if exists)
+ *
+ * Copyright 2022 IBM Corp.
+ */
+
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+#include "pseries.h"
+
+/*
+ * Flag attributes to fetch either all or one attribute from the HCALL
+ * flag = BE(0) => fetch all attributes with firstAttributeId = 0
+ * flag = BE(1) => fetch a single attribute with firstAttributeId = id
+ */
+#define ESI_FLAGS_ALL 0
+#define ESI_FLAGS_SINGLE (1ull << 63)
+
+#define KOBJ_MAX_ATTRS 3
+
+#define ESI_HDR_SIZE sizeof(struct h_energy_scale_info_hdr)
+#define ESI_ATTR_SIZE sizeof(struct energy_scale_attribute)
+#define CURR_MAX_ESI_ATTRS 8
+
+struct energy_scale_attribute {
+ __be64 id;
+ __be64 val;
+ u8 desc[64];
+ u8 value_desc[64];
+} __packed;
+
+struct h_energy_scale_info_hdr {
+ __be64 num_attrs;
+ __be64 array_offset;
+ u8 data_header_version;
+} __packed;
+
+struct papr_attr {
+ u64 id;
+ struct kobj_attribute kobj_attr;
+};
+
+struct papr_group {
+ struct attribute_group pg;
+ struct papr_attr pgattrs[KOBJ_MAX_ATTRS];
+};
+
+static struct papr_group *papr_groups;
+/* /sys/firmware/papr */
+static struct kobject *papr_kobj;
+/* /sys/firmware/papr/energy_scale_info */
+static struct kobject *esi_kobj;
+
+/*
+ * Energy modes can change dynamically hence making a new hcall each time the
+ * information needs to be retrieved
+ */
+static int papr_get_attr(u64 id, struct energy_scale_attribute *esi)
+{
+ int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE);
+ int ret, max_esi_attrs = CURR_MAX_ESI_ATTRS;
+ struct energy_scale_attribute *curr_esi;
+ struct h_energy_scale_info_hdr *hdr;
+ char *buf;
+
+ buf = kmalloc(esi_buf_size, GFP_KERNEL);
+ if (buf == NULL)
+ return -ENOMEM;
+
+retry:
+ ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_SINGLE,
+ id, virt_to_phys(buf),
+ esi_buf_size);
+
+ /*
+ * If the hcall fails with not enough memory for either the
+ * header or data, attempt to allocate more
+ */
+ if (ret == H_PARTIAL || ret == H_P4) {
+ char *temp_buf;
+
+ max_esi_attrs += 4;
+ esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs);
+
+ temp_buf = krealloc(buf, esi_buf_size, GFP_KERNEL);
+ if (temp_buf)
+ buf = temp_buf;
+ else
+ return -ENOMEM;
+
+ goto retry;
+ }
+
+ if (ret != H_SUCCESS) {
+ pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO");
+ ret = -EIO;
+ goto out_buf;
+ }
+
+ hdr = (struct h_energy_scale_info_hdr *) buf;
+ curr_esi = (struct energy_scale_attribute *)
+ (buf + be64_to_cpu(hdr->array_offset));
+
+ if (esi_buf_size <
+ be64_to_cpu(hdr->array_offset) + (be64_to_cpu(hdr->num_attrs)
+ * sizeof(struct energy_scale_attribute))) {
+ ret = -EIO;
+ goto out_buf;
+ }
+
+ *esi = *curr_esi;
+
+out_buf:
+ kfree(buf);
+
+ return ret;
+}
+
+/*
+ * Extract and export the description of the energy scale attributes
+ */
+static ssize_t desc_show(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr,
+ char *buf)
+{
+ struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+ kobj_attr);
+ struct energy_scale_attribute esi;
+ int ret;
+
+ ret = papr_get_attr(pattr->id, &esi);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%s\n", esi.desc);
+}
+
+/*
+ * Extract and export the numeric value of the energy scale attributes
+ */
+static ssize_t val_show(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr,
+ char *buf)
+{
+ struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+ kobj_attr);
+ struct energy_scale_attribute esi;
+ int ret;
+
+ ret = papr_get_attr(pattr->id, &esi);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%llu\n", be64_to_cpu(esi.val));
+}
+
+/*
+ * Extract and export the value description in string format of the energy
+ * scale attributes
+ */
+static ssize_t val_desc_show(struct kobject *kobj,
+ struct kobj_attribute *kobj_attr,
+ char *buf)
+{
+ struct papr_attr *pattr = container_of(kobj_attr, struct papr_attr,
+ kobj_attr);
+ struct energy_scale_attribute esi;
+ int ret;
+
+ ret = papr_get_attr(pattr->id, &esi);
+ if (ret)
+ return ret;
+
+ return sysfs_emit(buf, "%s\n", esi.value_desc);
+}
+
+static struct papr_ops_info {
+ const char *attr_name;
+ ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *kobj_attr,
+ char *buf);
+} ops_info[KOBJ_MAX_ATTRS] = {
+ { "desc", desc_show },
+ { "value", val_show },
+ { "value_desc", val_desc_show },
+};
+
+static void add_attr(u64 id, int index, struct papr_attr *attr)
+{
+ attr->id = id;
+ sysfs_attr_init(&attr->kobj_attr.attr);
+ attr->kobj_attr.attr.name = ops_info[index].attr_name;
+ attr->kobj_attr.attr.mode = 0444;
+ attr->kobj_attr.show = ops_info[index].show;
+}
+
+static int add_attr_group(u64 id, struct papr_group *pg, bool show_val_desc)
+{
+ int i;
+
+ for (i = 0; i < KOBJ_MAX_ATTRS; i++) {
+ if (!strcmp(ops_info[i].attr_name, "value_desc") &&
+ !show_val_desc) {
+ continue;
+ }
+ add_attr(id, i, &pg->pgattrs[i]);
+ pg->pg.attrs[i] = &pg->pgattrs[i].kobj_attr.attr;
+ }
+
+ return sysfs_create_group(esi_kobj, &pg->pg);
+}
+
+
+static int __init papr_init(void)
+{
+ int esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * ESI_ATTR_SIZE);
+ int ret, idx, i, max_esi_attrs = CURR_MAX_ESI_ATTRS;
+ struct h_energy_scale_info_hdr *esi_hdr;
+ struct energy_scale_attribute *esi_attrs;
+ uint64_t num_attrs;
+ char *esi_buf;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR) ||
+ !firmware_has_feature(FW_FEATURE_ENERGY_SCALE_INFO)) {
+ return -ENXIO;
+ }
+
+ esi_buf = kmalloc(esi_buf_size, GFP_KERNEL);
+ if (esi_buf == NULL)
+ return -ENOMEM;
+ /*
+ * hcall(
+ * uint64 H_GET_ENERGY_SCALE_INFO, // Get energy scale info
+ * uint64 flags, // Per the flag request
+ * uint64 firstAttributeId, // The attribute id
+ * uint64 bufferAddress, // Guest physical address of the output buffer
+ * uint64 bufferSize); // The size in bytes of the output buffer
+ */
+retry:
+
+ ret = plpar_hcall_norets(H_GET_ENERGY_SCALE_INFO, ESI_FLAGS_ALL, 0,
+ virt_to_phys(esi_buf), esi_buf_size);
+
+ /*
+ * If the hcall fails with not enough memory for either the
+ * header or data, attempt to allocate more
+ */
+ if (ret == H_PARTIAL || ret == H_P4) {
+ char *temp_esi_buf;
+
+ max_esi_attrs += 4;
+ esi_buf_size = ESI_HDR_SIZE + (CURR_MAX_ESI_ATTRS * max_esi_attrs);
+
+ temp_esi_buf = krealloc(esi_buf, esi_buf_size, GFP_KERNEL);
+ if (temp_esi_buf)
+ esi_buf = temp_esi_buf;
+ else
+ return -ENOMEM;
+
+ goto retry;
+ }
+
+ if (ret != H_SUCCESS) {
+ pr_warn("hcall failed: H_GET_ENERGY_SCALE_INFO, ret: %d\n", ret);
+ goto out_free_esi_buf;
+ }
+
+ esi_hdr = (struct h_energy_scale_info_hdr *) esi_buf;
+ num_attrs = be64_to_cpu(esi_hdr->num_attrs);
+ esi_attrs = (struct energy_scale_attribute *)
+ (esi_buf + be64_to_cpu(esi_hdr->array_offset));
+
+ if (esi_buf_size <
+ be64_to_cpu(esi_hdr->array_offset) +
+ (num_attrs * sizeof(struct energy_scale_attribute))) {
+ goto out_free_esi_buf;
+ }
+
+ papr_groups = kcalloc(num_attrs, sizeof(*papr_groups), GFP_KERNEL);
+ if (!papr_groups)
+ goto out_free_esi_buf;
+
+ papr_kobj = kobject_create_and_add("papr", firmware_kobj);
+ if (!papr_kobj) {
+ pr_warn("kobject_create_and_add papr failed\n");
+ goto out_papr_groups;
+ }
+
+ esi_kobj = kobject_create_and_add("energy_scale_info", papr_kobj);
+ if (!esi_kobj) {
+ pr_warn("kobject_create_and_add energy_scale_info failed\n");
+ goto out_kobj;
+ }
+
+ /* Allocate the groups before registering */
+ for (idx = 0; idx < num_attrs; idx++) {
+ papr_groups[idx].pg.attrs = kcalloc(KOBJ_MAX_ATTRS + 1,
+ sizeof(*papr_groups[idx].pg.attrs),
+ GFP_KERNEL);
+ if (!papr_groups[idx].pg.attrs)
+ goto out_pgattrs;
+
+ papr_groups[idx].pg.name = kasprintf(GFP_KERNEL, "%lld",
+ be64_to_cpu(esi_attrs[idx].id));
+ if (papr_groups[idx].pg.name == NULL)
+ goto out_pgattrs;
+ }
+
+ for (idx = 0; idx < num_attrs; idx++) {
+ bool show_val_desc = true;
+
+ /* Do not add the value desc attr if it does not exist */
+ if (strnlen(esi_attrs[idx].value_desc,
+ sizeof(esi_attrs[idx].value_desc)) == 0)
+ show_val_desc = false;
+
+ if (add_attr_group(be64_to_cpu(esi_attrs[idx].id),
+ &papr_groups[idx],
+ show_val_desc)) {
+ pr_warn("Failed to create papr attribute group %s\n",
+ papr_groups[idx].pg.name);
+ idx = num_attrs;
+ goto out_pgattrs;
+ }
+ }
+
+ kfree(esi_buf);
+ return 0;
+out_pgattrs:
+ for (i = 0; i < idx ; i++) {
+ kfree(papr_groups[i].pg.attrs);
+ kfree(papr_groups[i].pg.name);
+ }
+ kobject_put(esi_kobj);
+out_kobj:
+ kobject_put(papr_kobj);
+out_papr_groups:
+ kfree(papr_groups);
+out_free_esi_buf:
+ kfree(esi_buf);
+
+ return -ENOMEM;
+}
+
+machine_device_initcall(pseries, papr_init);
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
new file mode 100644
index 000000000..1a53e048c
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -0,0 +1,1581 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) "papr-scm: " fmt
+
+#include <linux/of.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/ndctl.h>
+#include <linux/sched.h>
+#include <linux/libnvdimm.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/seq_buf.h>
+#include <linux/nd.h>
+
+#include <asm/plpar_wrappers.h>
+#include <asm/papr_pdsm.h>
+#include <asm/mce.h>
+#include <asm/unaligned.h>
+#include <linux/perf_event.h>
+
+#define BIND_ANY_ADDR (~0ul)
+
+#define PAPR_SCM_DIMM_CMD_MASK \
+ ((1ul << ND_CMD_GET_CONFIG_SIZE) | \
+ (1ul << ND_CMD_GET_CONFIG_DATA) | \
+ (1ul << ND_CMD_SET_CONFIG_DATA) | \
+ (1ul << ND_CMD_CALL))
+
+/* DIMM health bitmap indicators */
+/* SCM device is unable to persist memory contents */
+#define PAPR_PMEM_UNARMED (1ULL << (63 - 0))
+/* SCM device failed to persist memory contents */
+#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1))
+/* SCM device contents are persisted from previous IPL */
+#define PAPR_PMEM_SHUTDOWN_CLEAN (1ULL << (63 - 2))
+/* SCM device contents are not persisted from previous IPL */
+#define PAPR_PMEM_EMPTY (1ULL << (63 - 3))
+/* SCM device memory life remaining is critically low */
+#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4))
+/* SCM device will be garded off next IPL due to failure */
+#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5))
+/* SCM contents cannot persist due to current platform health status */
+#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6))
+/* SCM device is unable to persist memory contents in certain conditions */
+#define PAPR_PMEM_HEALTH_NON_CRITICAL (1ULL << (63 - 7))
+/* SCM device is encrypted */
+#define PAPR_PMEM_ENCRYPTED (1ULL << (63 - 8))
+/* SCM device has been scrubbed and locked */
+#define PAPR_PMEM_SCRUBBED_AND_LOCKED (1ULL << (63 - 9))
+
+/* Bits status indicators for health bitmap indicating unarmed dimm */
+#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \
+ PAPR_PMEM_HEALTH_UNHEALTHY)
+
+/* Bits status indicators for health bitmap indicating unflushed dimm */
+#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
+
+/* Bits status indicators for health bitmap indicating unrestored dimm */
+#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY)
+
+/* Bit status indicators for smart event notification */
+#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
+ PAPR_PMEM_HEALTH_FATAL | \
+ PAPR_PMEM_HEALTH_UNHEALTHY)
+
+#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
+#define PAPR_SCM_PERF_STATS_VERSION 0x1
+
+/* Struct holding a single performance metric */
+struct papr_scm_perf_stat {
+ u8 stat_id[8];
+ __be64 stat_val;
+} __packed;
+
+/* Struct exchanged between kernel and PHYP for fetching drc perf stats */
+struct papr_scm_perf_stats {
+ u8 eye_catcher[8];
+ /* Should be PAPR_SCM_PERF_STATS_VERSION */
+ __be32 stats_version;
+ /* Number of stats following */
+ __be32 num_statistics;
+ /* zero or more performance matrics */
+ struct papr_scm_perf_stat scm_statistic[];
+} __packed;
+
+/* private struct associated with each region */
+struct papr_scm_priv {
+ struct platform_device *pdev;
+ struct device_node *dn;
+ uint32_t drc_index;
+ uint64_t blocks;
+ uint64_t block_size;
+ int metadata_size;
+ bool is_volatile;
+ bool hcall_flush_required;
+
+ uint64_t bound_addr;
+
+ struct nvdimm_bus_descriptor bus_desc;
+ struct nvdimm_bus *bus;
+ struct nvdimm *nvdimm;
+ struct resource res;
+ struct nd_region *region;
+ struct nd_interleave_set nd_set;
+ struct list_head region_list;
+
+ /* Protect dimm health data from concurrent read/writes */
+ struct mutex health_mutex;
+
+ /* Last time the health information of the dimm was updated */
+ unsigned long lasthealth_jiffies;
+
+ /* Health information for the dimm */
+ u64 health_bitmap;
+
+ /* Holds the last known dirty shutdown counter value */
+ u64 dirty_shutdown_counter;
+
+ /* length of the stat buffer as expected by phyp */
+ size_t stat_buffer_len;
+
+ /* The bits which needs to be overridden */
+ u64 health_bitmap_inject_mask;
+};
+
+static int papr_scm_pmem_flush(struct nd_region *nd_region,
+ struct bio *bio __maybe_unused)
+{
+ struct papr_scm_priv *p = nd_region_provider_data(nd_region);
+ unsigned long ret_buf[PLPAR_HCALL_BUFSIZE], token = 0;
+ long rc;
+
+ dev_dbg(&p->pdev->dev, "flush drc 0x%x", p->drc_index);
+
+ do {
+ rc = plpar_hcall(H_SCM_FLUSH, ret_buf, p->drc_index, token);
+ token = ret_buf[0];
+
+ /* Check if we are stalled for some time */
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ } else if (rc == H_BUSY) {
+ cond_resched();
+ }
+ } while (rc == H_BUSY);
+
+ if (rc) {
+ dev_err(&p->pdev->dev, "flush error: %ld", rc);
+ rc = -EIO;
+ } else {
+ dev_dbg(&p->pdev->dev, "flush drc 0x%x complete", p->drc_index);
+ }
+
+ return rc;
+}
+
+static LIST_HEAD(papr_nd_regions);
+static DEFINE_MUTEX(papr_ndr_lock);
+
+static int drc_pmem_bind(struct papr_scm_priv *p)
+{
+ unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ uint64_t saved = 0;
+ uint64_t token;
+ int64_t rc;
+
+ /*
+ * When the hypervisor cannot map all the requested memory in a single
+ * hcall it returns H_BUSY and we call again with the token until
+ * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS
+ * leave the system in an undefined state, so we wait.
+ */
+ token = 0;
+
+ do {
+ rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
+ p->blocks, BIND_ANY_ADDR, token);
+ token = ret[0];
+ if (!saved)
+ saved = ret[1];
+ cond_resched();
+ } while (rc == H_BUSY);
+
+ if (rc)
+ return rc;
+
+ p->bound_addr = saved;
+ dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n",
+ p->drc_index, (unsigned long)saved);
+ return rc;
+}
+
+static void drc_pmem_unbind(struct papr_scm_priv *p)
+{
+ unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ uint64_t token = 0;
+ int64_t rc;
+
+ dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index);
+
+ /* NB: unbind has the same retry requirements as drc_pmem_bind() */
+ do {
+
+ /* Unbind of all SCM resources associated with drcIndex */
+ rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC,
+ p->drc_index, token);
+ token = ret[0];
+
+ /* Check if we are stalled for some time */
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ } else if (rc == H_BUSY) {
+ cond_resched();
+ }
+
+ } while (rc == H_BUSY);
+
+ if (rc)
+ dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
+ else
+ dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n",
+ p->drc_index);
+
+ return;
+}
+
+static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
+{
+ unsigned long start_addr;
+ unsigned long end_addr;
+ unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ int64_t rc;
+
+
+ rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+ p->drc_index, 0);
+ if (rc)
+ goto err_out;
+ start_addr = ret[0];
+
+ /* Make sure the full region is bound. */
+ rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
+ p->drc_index, p->blocks - 1);
+ if (rc)
+ goto err_out;
+ end_addr = ret[0];
+
+ if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size))
+ goto err_out;
+
+ p->bound_addr = start_addr;
+ dev_dbg(&p->pdev->dev, "bound drc 0x%x to 0x%lx\n", p->drc_index, start_addr);
+ return rc;
+
+err_out:
+ dev_info(&p->pdev->dev,
+ "Failed to query, trying an unbind followed by bind");
+ drc_pmem_unbind(p);
+ return drc_pmem_bind(p);
+}
+
+/*
+ * Query the Dimm performance stats from PHYP and copy them (if returned) to
+ * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast
+ * (num_stats + header) bytes.
+ * - If buff_stats == NULL the return value is the size in bytes of the buffer
+ * needed to hold all supported performance-statistics.
+ * - If buff_stats != NULL and num_stats == 0 then we copy all known
+ * performance-statistics to 'buff_stat' and expect to be large enough to
+ * hold them.
+ * - if buff_stats != NULL and num_stats > 0 then copy the requested
+ * performance-statistics to buff_stats.
+ */
+static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
+ struct papr_scm_perf_stats *buff_stats,
+ unsigned int num_stats)
+{
+ unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ size_t size;
+ s64 rc;
+
+ /* Setup the out buffer */
+ if (buff_stats) {
+ memcpy(buff_stats->eye_catcher,
+ PAPR_SCM_PERF_STATS_EYECATCHER, 8);
+ buff_stats->stats_version =
+ cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION);
+ buff_stats->num_statistics =
+ cpu_to_be32(num_stats);
+
+ /*
+ * Calculate the buffer size based on num-stats provided
+ * or use the prefetched max buffer length
+ */
+ if (num_stats)
+ /* Calculate size from the num_stats */
+ size = sizeof(struct papr_scm_perf_stats) +
+ num_stats * sizeof(struct papr_scm_perf_stat);
+ else
+ size = p->stat_buffer_len;
+ } else {
+ /* In case of no out buffer ignore the size */
+ size = 0;
+ }
+
+ /* Do the HCALL asking PHYP for info */
+ rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index,
+ buff_stats ? virt_to_phys(buff_stats) : 0,
+ size);
+
+ /* Check if the error was due to an unknown stat-id */
+ if (rc == H_PARTIAL) {
+ dev_err(&p->pdev->dev,
+ "Unknown performance stats, Err:0x%016lX\n", ret[0]);
+ return -ENOENT;
+ } else if (rc == H_AUTHORITY) {
+ dev_info(&p->pdev->dev,
+ "Permission denied while accessing performance stats");
+ return -EPERM;
+ } else if (rc == H_UNSUPPORTED) {
+ dev_dbg(&p->pdev->dev, "Performance stats unsupported\n");
+ return -EOPNOTSUPP;
+ } else if (rc != H_SUCCESS) {
+ dev_err(&p->pdev->dev,
+ "Failed to query performance stats, Err:%lld\n", rc);
+ return -EIO;
+
+ } else if (!size) {
+ /* Handle case where stat buffer size was requested */
+ dev_dbg(&p->pdev->dev,
+ "Performance stats size %ld\n", ret[0]);
+ return ret[0];
+ }
+
+ /* Successfully fetched the requested stats from phyp */
+ dev_dbg(&p->pdev->dev,
+ "Performance stats returned %d stats\n",
+ be32_to_cpu(buff_stats->num_statistics));
+ return 0;
+}
+
+#ifdef CONFIG_PERF_EVENTS
+#define to_nvdimm_pmu(_pmu) container_of(_pmu, struct nvdimm_pmu, pmu)
+
+static const char * const nvdimm_events_map[] = {
+ [1] = "CtlResCt",
+ [2] = "CtlResTm",
+ [3] = "PonSecs ",
+ [4] = "MemLife ",
+ [5] = "CritRscU",
+ [6] = "HostLCnt",
+ [7] = "HostSCnt",
+ [8] = "HostSDur",
+ [9] = "HostLDur",
+ [10] = "MedRCnt ",
+ [11] = "MedWCnt ",
+ [12] = "MedRDur ",
+ [13] = "MedWDur ",
+ [14] = "CchRHCnt",
+ [15] = "CchWHCnt",
+ [16] = "FastWCnt",
+};
+
+static int papr_scm_pmu_get_value(struct perf_event *event, struct device *dev, u64 *count)
+{
+ struct papr_scm_perf_stat *stat;
+ struct papr_scm_perf_stats *stats;
+ struct papr_scm_priv *p = dev_get_drvdata(dev);
+ int rc, size;
+
+ /* Invalid eventcode */
+ if (event->attr.config == 0 || event->attr.config >= ARRAY_SIZE(nvdimm_events_map))
+ return -EINVAL;
+
+ /* Allocate request buffer enough to hold single performance stat */
+ size = sizeof(struct papr_scm_perf_stats) +
+ sizeof(struct papr_scm_perf_stat);
+
+ if (!p)
+ return -EINVAL;
+
+ stats = kzalloc(size, GFP_KERNEL);
+ if (!stats)
+ return -ENOMEM;
+
+ stat = &stats->scm_statistic[0];
+ memcpy(&stat->stat_id,
+ nvdimm_events_map[event->attr.config],
+ sizeof(stat->stat_id));
+ stat->stat_val = 0;
+
+ rc = drc_pmem_query_stats(p, stats, 1);
+ if (rc < 0) {
+ kfree(stats);
+ return rc;
+ }
+
+ *count = be64_to_cpu(stat->stat_val);
+ kfree(stats);
+ return 0;
+}
+
+static int papr_scm_pmu_event_init(struct perf_event *event)
+{
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+ struct papr_scm_priv *p;
+
+ if (!nd_pmu)
+ return -EINVAL;
+
+ /* test the event attr type for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* it does not support event sampling mode */
+ if (is_sampling_event(event))
+ return -EOPNOTSUPP;
+
+ /* no branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ p = (struct papr_scm_priv *)nd_pmu->dev->driver_data;
+ if (!p)
+ return -EINVAL;
+
+ /* Invalid eventcode */
+ if (event->attr.config == 0 || event->attr.config > 16)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int papr_scm_pmu_add(struct perf_event *event, int flags)
+{
+ u64 count;
+ int rc;
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+ if (!nd_pmu)
+ return -EINVAL;
+
+ if (flags & PERF_EF_START) {
+ rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &count);
+ if (rc)
+ return rc;
+
+ local64_set(&event->hw.prev_count, count);
+ }
+
+ return 0;
+}
+
+static void papr_scm_pmu_read(struct perf_event *event)
+{
+ u64 prev, now;
+ int rc;
+ struct nvdimm_pmu *nd_pmu = to_nvdimm_pmu(event->pmu);
+
+ if (!nd_pmu)
+ return;
+
+ rc = papr_scm_pmu_get_value(event, nd_pmu->dev, &now);
+ if (rc)
+ return;
+
+ prev = local64_xchg(&event->hw.prev_count, now);
+ local64_add(now - prev, &event->count);
+}
+
+static void papr_scm_pmu_del(struct perf_event *event, int flags)
+{
+ papr_scm_pmu_read(event);
+}
+
+static void papr_scm_pmu_register(struct papr_scm_priv *p)
+{
+ struct nvdimm_pmu *nd_pmu;
+ int rc, nodeid;
+
+ nd_pmu = kzalloc(sizeof(*nd_pmu), GFP_KERNEL);
+ if (!nd_pmu) {
+ rc = -ENOMEM;
+ goto pmu_err_print;
+ }
+
+ if (!p->stat_buffer_len) {
+ rc = -ENOENT;
+ goto pmu_check_events_err;
+ }
+
+ nd_pmu->pmu.task_ctx_nr = perf_invalid_context;
+ nd_pmu->pmu.name = nvdimm_name(p->nvdimm);
+ nd_pmu->pmu.event_init = papr_scm_pmu_event_init;
+ nd_pmu->pmu.read = papr_scm_pmu_read;
+ nd_pmu->pmu.add = papr_scm_pmu_add;
+ nd_pmu->pmu.del = papr_scm_pmu_del;
+
+ nd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_INTERRUPT |
+ PERF_PMU_CAP_NO_EXCLUDE;
+
+ /*updating the cpumask variable */
+ nodeid = numa_map_to_online_node(dev_to_node(&p->pdev->dev));
+ nd_pmu->arch_cpumask = *cpumask_of_node(nodeid);
+
+ rc = register_nvdimm_pmu(nd_pmu, p->pdev);
+ if (rc)
+ goto pmu_check_events_err;
+
+ /*
+ * Set archdata.priv value to nvdimm_pmu structure, to handle the
+ * unregistering of pmu device.
+ */
+ p->pdev->archdata.priv = nd_pmu;
+ return;
+
+pmu_check_events_err:
+ kfree(nd_pmu);
+pmu_err_print:
+ dev_info(&p->pdev->dev, "nvdimm pmu didn't register rc=%d\n", rc);
+}
+
+#else
+static void papr_scm_pmu_register(struct papr_scm_priv *p) { }
+#endif
+
+/*
+ * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
+ * health information.
+ */
+static int __drc_pmem_query_health(struct papr_scm_priv *p)
+{
+ unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ u64 bitmap = 0;
+ long rc;
+
+ /* issue the hcall */
+ rc = plpar_hcall(H_SCM_HEALTH, ret, p->drc_index);
+ if (rc == H_SUCCESS)
+ bitmap = ret[0] & ret[1];
+ else if (rc == H_FUNCTION)
+ dev_info_once(&p->pdev->dev,
+ "Hcall H_SCM_HEALTH not implemented, assuming empty health bitmap");
+ else {
+
+ dev_err(&p->pdev->dev,
+ "Failed to query health information, Err:%ld\n", rc);
+ return -ENXIO;
+ }
+
+ p->lasthealth_jiffies = jiffies;
+ /* Allow injecting specific health bits via inject mask. */
+ if (p->health_bitmap_inject_mask)
+ bitmap = (bitmap & ~p->health_bitmap_inject_mask) |
+ p->health_bitmap_inject_mask;
+ WRITE_ONCE(p->health_bitmap, bitmap);
+ dev_dbg(&p->pdev->dev,
+ "Queried dimm health info. Bitmap:0x%016lx Mask:0x%016lx\n",
+ ret[0], ret[1]);
+
+ return 0;
+}
+
+/* Min interval in seconds for assuming stable dimm health */
+#define MIN_HEALTH_QUERY_INTERVAL 60
+
+/* Query cached health info and if needed call drc_pmem_query_health */
+static int drc_pmem_query_health(struct papr_scm_priv *p)
+{
+ unsigned long cache_timeout;
+ int rc;
+
+ /* Protect concurrent modifications to papr_scm_priv */
+ rc = mutex_lock_interruptible(&p->health_mutex);
+ if (rc)
+ return rc;
+
+ /* Jiffies offset for which the health data is assumed to be same */
+ cache_timeout = p->lasthealth_jiffies +
+ msecs_to_jiffies(MIN_HEALTH_QUERY_INTERVAL * 1000);
+
+ /* Fetch new health info is its older than MIN_HEALTH_QUERY_INTERVAL */
+ if (time_after(jiffies, cache_timeout))
+ rc = __drc_pmem_query_health(p);
+ else
+ /* Assume cached health data is valid */
+ rc = 0;
+
+ mutex_unlock(&p->health_mutex);
+ return rc;
+}
+
+static int papr_scm_meta_get(struct papr_scm_priv *p,
+ struct nd_cmd_get_config_data_hdr *hdr)
+{
+ unsigned long data[PLPAR_HCALL_BUFSIZE];
+ unsigned long offset, data_offset;
+ int len, read;
+ int64_t ret;
+
+ if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
+ return -EINVAL;
+
+ for (len = hdr->in_length; len; len -= read) {
+
+ data_offset = hdr->in_length - len;
+ offset = hdr->in_offset + data_offset;
+
+ if (len >= 8)
+ read = 8;
+ else if (len >= 4)
+ read = 4;
+ else if (len >= 2)
+ read = 2;
+ else
+ read = 1;
+
+ ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
+ offset, read);
+
+ if (ret == H_PARAMETER) /* bad DRC index */
+ return -ENODEV;
+ if (ret)
+ return -EINVAL; /* other invalid parameter */
+
+ switch (read) {
+ case 8:
+ *(uint64_t *)(hdr->out_buf + data_offset) = be64_to_cpu(data[0]);
+ break;
+ case 4:
+ *(uint32_t *)(hdr->out_buf + data_offset) = be32_to_cpu(data[0] & 0xffffffff);
+ break;
+
+ case 2:
+ *(uint16_t *)(hdr->out_buf + data_offset) = be16_to_cpu(data[0] & 0xffff);
+ break;
+
+ case 1:
+ *(uint8_t *)(hdr->out_buf + data_offset) = (data[0] & 0xff);
+ break;
+ }
+ }
+ return 0;
+}
+
+static int papr_scm_meta_set(struct papr_scm_priv *p,
+ struct nd_cmd_set_config_hdr *hdr)
+{
+ unsigned long offset, data_offset;
+ int len, wrote;
+ unsigned long data;
+ __be64 data_be;
+ int64_t ret;
+
+ if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
+ return -EINVAL;
+
+ for (len = hdr->in_length; len; len -= wrote) {
+
+ data_offset = hdr->in_length - len;
+ offset = hdr->in_offset + data_offset;
+
+ if (len >= 8) {
+ data = *(uint64_t *)(hdr->in_buf + data_offset);
+ data_be = cpu_to_be64(data);
+ wrote = 8;
+ } else if (len >= 4) {
+ data = *(uint32_t *)(hdr->in_buf + data_offset);
+ data &= 0xffffffff;
+ data_be = cpu_to_be32(data);
+ wrote = 4;
+ } else if (len >= 2) {
+ data = *(uint16_t *)(hdr->in_buf + data_offset);
+ data &= 0xffff;
+ data_be = cpu_to_be16(data);
+ wrote = 2;
+ } else {
+ data_be = *(uint8_t *)(hdr->in_buf + data_offset);
+ data_be &= 0xff;
+ wrote = 1;
+ }
+
+ ret = plpar_hcall_norets(H_SCM_WRITE_METADATA, p->drc_index,
+ offset, data_be, wrote);
+ if (ret == H_PARAMETER) /* bad DRC index */
+ return -ENODEV;
+ if (ret)
+ return -EINVAL; /* other invalid parameter */
+ }
+
+ return 0;
+}
+
+/*
+ * Do a sanity checks on the inputs args to dimm-control function and return
+ * '0' if valid. Validation of PDSM payloads happens later in
+ * papr_scm_service_pdsm.
+ */
+static int is_cmd_valid(struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+ unsigned int buf_len)
+{
+ unsigned long cmd_mask = PAPR_SCM_DIMM_CMD_MASK;
+ struct nd_cmd_pkg *nd_cmd;
+ struct papr_scm_priv *p;
+ enum papr_pdsm pdsm;
+
+ /* Only dimm-specific calls are supported atm */
+ if (!nvdimm)
+ return -EINVAL;
+
+ /* get the provider data from struct nvdimm */
+ p = nvdimm_provider_data(nvdimm);
+
+ if (!test_bit(cmd, &cmd_mask)) {
+ dev_dbg(&p->pdev->dev, "Unsupported cmd=%u\n", cmd);
+ return -EINVAL;
+ }
+
+ /* For CMD_CALL verify pdsm request */
+ if (cmd == ND_CMD_CALL) {
+ /* Verify the envelope and envelop size */
+ if (!buf ||
+ buf_len < (sizeof(struct nd_cmd_pkg) + ND_PDSM_HDR_SIZE)) {
+ dev_dbg(&p->pdev->dev, "Invalid pkg size=%u\n",
+ buf_len);
+ return -EINVAL;
+ }
+
+ /* Verify that the nd_cmd_pkg.nd_family is correct */
+ nd_cmd = (struct nd_cmd_pkg *)buf;
+
+ if (nd_cmd->nd_family != NVDIMM_FAMILY_PAPR) {
+ dev_dbg(&p->pdev->dev, "Invalid pkg family=0x%llx\n",
+ nd_cmd->nd_family);
+ return -EINVAL;
+ }
+
+ pdsm = (enum papr_pdsm)nd_cmd->nd_command;
+
+ /* Verify if the pdsm command is valid */
+ if (pdsm <= PAPR_PDSM_MIN || pdsm >= PAPR_PDSM_MAX) {
+ dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid PDSM\n",
+ pdsm);
+ return -EINVAL;
+ }
+
+ /* Have enough space to hold returned 'nd_pkg_pdsm' header */
+ if (nd_cmd->nd_size_out < ND_PDSM_HDR_SIZE) {
+ dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid payload\n",
+ pdsm);
+ return -EINVAL;
+ }
+ }
+
+ /* Let the command be further processed */
+ return 0;
+}
+
+static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p,
+ union nd_pdsm_payload *payload)
+{
+ int rc, size;
+ u64 statval;
+ struct papr_scm_perf_stat *stat;
+ struct papr_scm_perf_stats *stats;
+
+ /* Silently fail if fetching performance metrics isn't supported */
+ if (!p->stat_buffer_len)
+ return 0;
+
+ /* Allocate request buffer enough to hold single performance stat */
+ size = sizeof(struct papr_scm_perf_stats) +
+ sizeof(struct papr_scm_perf_stat);
+
+ stats = kzalloc(size, GFP_KERNEL);
+ if (!stats)
+ return -ENOMEM;
+
+ stat = &stats->scm_statistic[0];
+ memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id));
+ stat->stat_val = 0;
+
+ /* Fetch the fuel gauge and populate it in payload */
+ rc = drc_pmem_query_stats(p, stats, 1);
+ if (rc < 0) {
+ dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc);
+ goto free_stats;
+ }
+
+ statval = be64_to_cpu(stat->stat_val);
+ dev_dbg(&p->pdev->dev,
+ "Fetched fuel-gauge %llu", statval);
+ payload->health.extension_flags |=
+ PDSM_DIMM_HEALTH_RUN_GAUGE_VALID;
+ payload->health.dimm_fuel_gauge = statval;
+
+ rc = sizeof(struct nd_papr_pdsm_health);
+
+free_stats:
+ kfree(stats);
+ return rc;
+}
+
+/* Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_dsc(struct papr_scm_priv *p,
+ union nd_pdsm_payload *payload)
+{
+ payload->health.extension_flags |= PDSM_DIMM_DSC_VALID;
+ payload->health.dimm_dsc = p->dirty_shutdown_counter;
+
+ return sizeof(struct nd_papr_pdsm_health);
+}
+
+/* Fetch the DIMM health info and populate it in provided package. */
+static int papr_pdsm_health(struct papr_scm_priv *p,
+ union nd_pdsm_payload *payload)
+{
+ int rc;
+
+ /* Ensure dimm health mutex is taken preventing concurrent access */
+ rc = mutex_lock_interruptible(&p->health_mutex);
+ if (rc)
+ goto out;
+
+ /* Always fetch upto date dimm health data ignoring cached values */
+ rc = __drc_pmem_query_health(p);
+ if (rc) {
+ mutex_unlock(&p->health_mutex);
+ goto out;
+ }
+
+ /* update health struct with various flags derived from health bitmap */
+ payload->health = (struct nd_papr_pdsm_health) {
+ .extension_flags = 0,
+ .dimm_unarmed = !!(p->health_bitmap & PAPR_PMEM_UNARMED_MASK),
+ .dimm_bad_shutdown = !!(p->health_bitmap & PAPR_PMEM_BAD_SHUTDOWN_MASK),
+ .dimm_bad_restore = !!(p->health_bitmap & PAPR_PMEM_BAD_RESTORE_MASK),
+ .dimm_scrubbed = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED),
+ .dimm_locked = !!(p->health_bitmap & PAPR_PMEM_SCRUBBED_AND_LOCKED),
+ .dimm_encrypted = !!(p->health_bitmap & PAPR_PMEM_ENCRYPTED),
+ .dimm_health = PAPR_PDSM_DIMM_HEALTHY,
+ };
+
+ /* Update field dimm_health based on health_bitmap flags */
+ if (p->health_bitmap & PAPR_PMEM_HEALTH_FATAL)
+ payload->health.dimm_health = PAPR_PDSM_DIMM_FATAL;
+ else if (p->health_bitmap & PAPR_PMEM_HEALTH_CRITICAL)
+ payload->health.dimm_health = PAPR_PDSM_DIMM_CRITICAL;
+ else if (p->health_bitmap & PAPR_PMEM_HEALTH_UNHEALTHY)
+ payload->health.dimm_health = PAPR_PDSM_DIMM_UNHEALTHY;
+
+ /* struct populated hence can release the mutex now */
+ mutex_unlock(&p->health_mutex);
+
+ /* Populate the fuel gauge meter in the payload */
+ papr_pdsm_fuel_gauge(p, payload);
+ /* Populate the dirty-shutdown-counter field */
+ papr_pdsm_dsc(p, payload);
+
+ rc = sizeof(struct nd_papr_pdsm_health);
+
+out:
+ return rc;
+}
+
+/* Inject a smart error Add the dirty-shutdown-counter value to the pdsm */
+static int papr_pdsm_smart_inject(struct papr_scm_priv *p,
+ union nd_pdsm_payload *payload)
+{
+ int rc;
+ u32 supported_flags = 0;
+ u64 inject_mask = 0, clear_mask = 0;
+ u64 mask;
+
+ /* Check for individual smart error flags and update inject/clear masks */
+ if (payload->smart_inject.flags & PDSM_SMART_INJECT_HEALTH_FATAL) {
+ supported_flags |= PDSM_SMART_INJECT_HEALTH_FATAL;
+ if (payload->smart_inject.fatal_enable)
+ inject_mask |= PAPR_PMEM_HEALTH_FATAL;
+ else
+ clear_mask |= PAPR_PMEM_HEALTH_FATAL;
+ }
+
+ if (payload->smart_inject.flags & PDSM_SMART_INJECT_BAD_SHUTDOWN) {
+ supported_flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN;
+ if (payload->smart_inject.unsafe_shutdown_enable)
+ inject_mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
+ else
+ clear_mask |= PAPR_PMEM_SHUTDOWN_DIRTY;
+ }
+
+ dev_dbg(&p->pdev->dev, "[Smart-inject] inject_mask=%#llx clear_mask=%#llx\n",
+ inject_mask, clear_mask);
+
+ /* Prevent concurrent access to dimm health bitmap related members */
+ rc = mutex_lock_interruptible(&p->health_mutex);
+ if (rc)
+ return rc;
+
+ /* Use inject/clear masks to set health_bitmap_inject_mask */
+ mask = READ_ONCE(p->health_bitmap_inject_mask);
+ mask = (mask & ~clear_mask) | inject_mask;
+ WRITE_ONCE(p->health_bitmap_inject_mask, mask);
+
+ /* Invalidate cached health bitmap */
+ p->lasthealth_jiffies = 0;
+
+ mutex_unlock(&p->health_mutex);
+
+ /* Return the supported flags back to userspace */
+ payload->smart_inject.flags = supported_flags;
+
+ return sizeof(struct nd_papr_pdsm_health);
+}
+
+/*
+ * 'struct pdsm_cmd_desc'
+ * Identifies supported PDSMs' expected length of in/out payloads
+ * and pdsm service function.
+ *
+ * size_in : Size of input payload if any in the PDSM request.
+ * size_out : Size of output payload if any in the PDSM request.
+ * service : Service function for the PDSM request. Return semantics:
+ * rc < 0 : Error servicing PDSM and rc indicates the error.
+ * rc >=0 : Serviced successfully and 'rc' indicate number of
+ * bytes written to payload.
+ */
+struct pdsm_cmd_desc {
+ u32 size_in;
+ u32 size_out;
+ int (*service)(struct papr_scm_priv *dimm,
+ union nd_pdsm_payload *payload);
+};
+
+/* Holds all supported PDSMs' command descriptors */
+static const struct pdsm_cmd_desc __pdsm_cmd_descriptors[] = {
+ [PAPR_PDSM_MIN] = {
+ .size_in = 0,
+ .size_out = 0,
+ .service = NULL,
+ },
+ /* New PDSM command descriptors to be added below */
+
+ [PAPR_PDSM_HEALTH] = {
+ .size_in = 0,
+ .size_out = sizeof(struct nd_papr_pdsm_health),
+ .service = papr_pdsm_health,
+ },
+
+ [PAPR_PDSM_SMART_INJECT] = {
+ .size_in = sizeof(struct nd_papr_pdsm_smart_inject),
+ .size_out = sizeof(struct nd_papr_pdsm_smart_inject),
+ .service = papr_pdsm_smart_inject,
+ },
+ /* Empty */
+ [PAPR_PDSM_MAX] = {
+ .size_in = 0,
+ .size_out = 0,
+ .service = NULL,
+ },
+};
+
+/* Given a valid pdsm cmd return its command descriptor else return NULL */
+static inline const struct pdsm_cmd_desc *pdsm_cmd_desc(enum papr_pdsm cmd)
+{
+ if (cmd >= 0 || cmd < ARRAY_SIZE(__pdsm_cmd_descriptors))
+ return &__pdsm_cmd_descriptors[cmd];
+
+ return NULL;
+}
+
+/*
+ * For a given pdsm request call an appropriate service function.
+ * Returns errors if any while handling the pdsm command package.
+ */
+static int papr_scm_service_pdsm(struct papr_scm_priv *p,
+ struct nd_cmd_pkg *pkg)
+{
+ /* Get the PDSM header and PDSM command */
+ struct nd_pkg_pdsm *pdsm_pkg = (struct nd_pkg_pdsm *)pkg->nd_payload;
+ enum papr_pdsm pdsm = (enum papr_pdsm)pkg->nd_command;
+ const struct pdsm_cmd_desc *pdsc;
+ int rc;
+
+ /* Fetch corresponding pdsm descriptor for validation and servicing */
+ pdsc = pdsm_cmd_desc(pdsm);
+
+ /* Validate pdsm descriptor */
+ /* Ensure that reserved fields are 0 */
+ if (pdsm_pkg->reserved[0] || pdsm_pkg->reserved[1]) {
+ dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Invalid reserved field\n",
+ pdsm);
+ return -EINVAL;
+ }
+
+ /* If pdsm expects some input, then ensure that the size_in matches */
+ if (pdsc->size_in &&
+ pkg->nd_size_in != (pdsc->size_in + ND_PDSM_HDR_SIZE)) {
+ dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_in=%d\n",
+ pdsm, pkg->nd_size_in);
+ return -EINVAL;
+ }
+
+ /* If pdsm wants to return data, then ensure that size_out matches */
+ if (pdsc->size_out &&
+ pkg->nd_size_out != (pdsc->size_out + ND_PDSM_HDR_SIZE)) {
+ dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Mismatched size_out=%d\n",
+ pdsm, pkg->nd_size_out);
+ return -EINVAL;
+ }
+
+ /* Service the pdsm */
+ if (pdsc->service) {
+ dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Servicing..\n", pdsm);
+
+ rc = pdsc->service(p, &pdsm_pkg->payload);
+
+ if (rc < 0) {
+ /* error encountered while servicing pdsm */
+ pdsm_pkg->cmd_status = rc;
+ pkg->nd_fw_size = ND_PDSM_HDR_SIZE;
+ } else {
+ /* pdsm serviced and 'rc' bytes written to payload */
+ pdsm_pkg->cmd_status = 0;
+ pkg->nd_fw_size = ND_PDSM_HDR_SIZE + rc;
+ }
+ } else {
+ dev_dbg(&p->pdev->dev, "PDSM[0x%x]: Unsupported PDSM request\n",
+ pdsm);
+ pdsm_pkg->cmd_status = -ENOENT;
+ pkg->nd_fw_size = ND_PDSM_HDR_SIZE;
+ }
+
+ return pdsm_pkg->cmd_status;
+}
+
+static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
+ struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+ unsigned int buf_len, int *cmd_rc)
+{
+ struct nd_cmd_get_config_size *get_size_hdr;
+ struct nd_cmd_pkg *call_pkg = NULL;
+ struct papr_scm_priv *p;
+ int rc;
+
+ rc = is_cmd_valid(nvdimm, cmd, buf, buf_len);
+ if (rc) {
+ pr_debug("Invalid cmd=0x%x. Err=%d\n", cmd, rc);
+ return rc;
+ }
+
+ /* Use a local variable in case cmd_rc pointer is NULL */
+ if (!cmd_rc)
+ cmd_rc = &rc;
+
+ p = nvdimm_provider_data(nvdimm);
+
+ switch (cmd) {
+ case ND_CMD_GET_CONFIG_SIZE:
+ get_size_hdr = buf;
+
+ get_size_hdr->status = 0;
+ get_size_hdr->max_xfer = 8;
+ get_size_hdr->config_size = p->metadata_size;
+ *cmd_rc = 0;
+ break;
+
+ case ND_CMD_GET_CONFIG_DATA:
+ *cmd_rc = papr_scm_meta_get(p, buf);
+ break;
+
+ case ND_CMD_SET_CONFIG_DATA:
+ *cmd_rc = papr_scm_meta_set(p, buf);
+ break;
+
+ case ND_CMD_CALL:
+ call_pkg = (struct nd_cmd_pkg *)buf;
+ *cmd_rc = papr_scm_service_pdsm(p, call_pkg);
+ break;
+
+ default:
+ dev_dbg(&p->pdev->dev, "Unknown command = %d\n", cmd);
+ return -EINVAL;
+ }
+
+ dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
+
+ return 0;
+}
+
+static ssize_t health_bitmap_inject_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvdimm *dimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+ return sprintf(buf, "%#llx\n",
+ READ_ONCE(p->health_bitmap_inject_mask));
+}
+
+static DEVICE_ATTR_ADMIN_RO(health_bitmap_inject);
+
+static ssize_t perf_stats_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int index;
+ ssize_t rc;
+ struct seq_buf s;
+ struct papr_scm_perf_stat *stat;
+ struct papr_scm_perf_stats *stats;
+ struct nvdimm *dimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+ if (!p->stat_buffer_len)
+ return -ENOENT;
+
+ /* Allocate the buffer for phyp where stats are written */
+ stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
+ if (!stats)
+ return -ENOMEM;
+
+ /* Ask phyp to return all dimm perf stats */
+ rc = drc_pmem_query_stats(p, stats, 0);
+ if (rc)
+ goto free_stats;
+ /*
+ * Go through the returned output buffer and print stats and
+ * values. Since stat_id is essentially a char string of
+ * 8 bytes, simply use the string format specifier to print it.
+ */
+ seq_buf_init(&s, buf, PAGE_SIZE);
+ for (index = 0, stat = stats->scm_statistic;
+ index < be32_to_cpu(stats->num_statistics);
+ ++index, ++stat) {
+ seq_buf_printf(&s, "%.8s = 0x%016llX\n",
+ stat->stat_id,
+ be64_to_cpu(stat->stat_val));
+ }
+
+free_stats:
+ kfree(stats);
+ return rc ? rc : (ssize_t)seq_buf_used(&s);
+}
+static DEVICE_ATTR_ADMIN_RO(perf_stats);
+
+static ssize_t flags_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *dimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+ struct seq_buf s;
+ u64 health;
+ int rc;
+
+ rc = drc_pmem_query_health(p);
+ if (rc)
+ return rc;
+
+ /* Copy health_bitmap locally, check masks & update out buffer */
+ health = READ_ONCE(p->health_bitmap);
+
+ seq_buf_init(&s, buf, PAGE_SIZE);
+ if (health & PAPR_PMEM_UNARMED_MASK)
+ seq_buf_printf(&s, "not_armed ");
+
+ if (health & PAPR_PMEM_BAD_SHUTDOWN_MASK)
+ seq_buf_printf(&s, "flush_fail ");
+
+ if (health & PAPR_PMEM_BAD_RESTORE_MASK)
+ seq_buf_printf(&s, "restore_fail ");
+
+ if (health & PAPR_PMEM_ENCRYPTED)
+ seq_buf_printf(&s, "encrypted ");
+
+ if (health & PAPR_PMEM_SMART_EVENT_MASK)
+ seq_buf_printf(&s, "smart_notify ");
+
+ if (health & PAPR_PMEM_SCRUBBED_AND_LOCKED)
+ seq_buf_printf(&s, "scrubbed locked ");
+
+ if (seq_buf_used(&s))
+ seq_buf_printf(&s, "\n");
+
+ return seq_buf_used(&s);
+}
+DEVICE_ATTR_RO(flags);
+
+static ssize_t dirty_shutdown_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *dimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+ return sysfs_emit(buf, "%llu\n", p->dirty_shutdown_counter);
+}
+DEVICE_ATTR_RO(dirty_shutdown);
+
+static umode_t papr_nd_attribute_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ struct papr_scm_priv *p = nvdimm_provider_data(nvdimm);
+
+ /* For if perf-stats not available remove perf_stats sysfs */
+ if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0)
+ return 0;
+
+ return attr->mode;
+}
+
+/* papr_scm specific dimm attributes */
+static struct attribute *papr_nd_attributes[] = {
+ &dev_attr_flags.attr,
+ &dev_attr_perf_stats.attr,
+ &dev_attr_dirty_shutdown.attr,
+ &dev_attr_health_bitmap_inject.attr,
+ NULL,
+};
+
+static const struct attribute_group papr_nd_attribute_group = {
+ .name = "papr",
+ .is_visible = papr_nd_attribute_visible,
+ .attrs = papr_nd_attributes,
+};
+
+static const struct attribute_group *papr_nd_attr_groups[] = {
+ &papr_nd_attribute_group,
+ NULL,
+};
+
+static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
+{
+ struct device *dev = &p->pdev->dev;
+ struct nd_mapping_desc mapping;
+ struct nd_region_desc ndr_desc;
+ unsigned long dimm_flags;
+ int target_nid, online_nid;
+
+ p->bus_desc.ndctl = papr_scm_ndctl;
+ p->bus_desc.module = THIS_MODULE;
+ p->bus_desc.of_node = p->pdev->dev.of_node;
+ p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
+
+ /* Set the dimm command family mask to accept PDSMs */
+ set_bit(NVDIMM_FAMILY_PAPR, &p->bus_desc.dimm_family_mask);
+
+ if (!p->bus_desc.provider_name)
+ return -ENOMEM;
+
+ p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
+ if (!p->bus) {
+ dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
+ kfree(p->bus_desc.provider_name);
+ return -ENXIO;
+ }
+
+ dimm_flags = 0;
+ set_bit(NDD_LABELING, &dimm_flags);
+
+ /*
+ * Check if the nvdimm is unarmed. No locking needed as we are still
+ * initializing. Ignore error encountered if any.
+ */
+ __drc_pmem_query_health(p);
+
+ if (p->health_bitmap & PAPR_PMEM_UNARMED_MASK)
+ set_bit(NDD_UNARMED, &dimm_flags);
+
+ p->nvdimm = nvdimm_create(p->bus, p, papr_nd_attr_groups,
+ dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
+ if (!p->nvdimm) {
+ dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
+ goto err;
+ }
+
+ if (nvdimm_bus_check_dimm_count(p->bus, 1))
+ goto err;
+
+ /* now add the region */
+
+ memset(&mapping, 0, sizeof(mapping));
+ mapping.nvdimm = p->nvdimm;
+ mapping.start = 0;
+ mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
+
+ memset(&ndr_desc, 0, sizeof(ndr_desc));
+ target_nid = dev_to_node(&p->pdev->dev);
+ online_nid = numa_map_to_online_node(target_nid);
+ ndr_desc.numa_node = online_nid;
+ ndr_desc.target_node = target_nid;
+ ndr_desc.res = &p->res;
+ ndr_desc.of_node = p->dn;
+ ndr_desc.provider_data = p;
+ ndr_desc.mapping = &mapping;
+ ndr_desc.num_mappings = 1;
+ ndr_desc.nd_set = &p->nd_set;
+
+ if (p->hcall_flush_required) {
+ set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+ ndr_desc.flush = papr_scm_pmem_flush;
+ }
+
+ if (p->is_volatile)
+ p->region = nvdimm_volatile_region_create(p->bus, &ndr_desc);
+ else {
+ set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
+ p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
+ }
+ if (!p->region) {
+ dev_err(dev, "Error registering region %pR from %pOF\n",
+ ndr_desc.res, p->dn);
+ goto err;
+ }
+ if (target_nid != online_nid)
+ dev_info(dev, "Region registered with target node %d and online node %d",
+ target_nid, online_nid);
+
+ mutex_lock(&papr_ndr_lock);
+ list_add_tail(&p->region_list, &papr_nd_regions);
+ mutex_unlock(&papr_ndr_lock);
+
+ return 0;
+
+err: nvdimm_bus_unregister(p->bus);
+ kfree(p->bus_desc.provider_name);
+ return -ENXIO;
+}
+
+static void papr_scm_add_badblock(struct nd_region *region,
+ struct nvdimm_bus *bus, u64 phys_addr)
+{
+ u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+
+ if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) {
+ pr_err("Bad block registration for 0x%llx failed\n", phys_addr);
+ return;
+ }
+
+ pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n",
+ aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+ nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON);
+}
+
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct machine_check_event *evt = data;
+ struct papr_scm_priv *p;
+ u64 phys_addr;
+ bool found = false;
+
+ if (evt->error_type != MCE_ERROR_TYPE_UE)
+ return NOTIFY_DONE;
+
+ if (list_empty(&papr_nd_regions))
+ return NOTIFY_DONE;
+
+ /*
+ * The physical address obtained here is PAGE_SIZE aligned, so get the
+ * exact address from the effective address
+ */
+ phys_addr = evt->u.ue_error.physical_address +
+ (evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+ if (!evt->u.ue_error.physical_address_provided ||
+ !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+ return NOTIFY_DONE;
+
+ /* mce notifier is called from a process context, so mutex is safe */
+ mutex_lock(&papr_ndr_lock);
+ list_for_each_entry(p, &papr_nd_regions, region_list) {
+ if (phys_addr >= p->res.start && phys_addr <= p->res.end) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ papr_scm_add_badblock(p->region, p->bus, phys_addr);
+
+ mutex_unlock(&papr_ndr_lock);
+
+ return found ? NOTIFY_OK : NOTIFY_DONE;
+}
+
+static struct notifier_block mce_ue_nb = {
+ .notifier_call = handle_mce_ue
+};
+
+static int papr_scm_probe(struct platform_device *pdev)
+{
+ struct device_node *dn = pdev->dev.of_node;
+ u32 drc_index, metadata_size;
+ u64 blocks, block_size;
+ struct papr_scm_priv *p;
+ u8 uuid_raw[UUID_SIZE];
+ const char *uuid_str;
+ ssize_t stat_size;
+ uuid_t uuid;
+ int rc;
+
+ /* check we have all the required DT properties */
+ if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) {
+ dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn);
+ return -ENODEV;
+ }
+
+ if (of_property_read_u64(dn, "ibm,block-size", &block_size)) {
+ dev_err(&pdev->dev, "%pOF: missing block-size!\n", dn);
+ return -ENODEV;
+ }
+
+ if (of_property_read_u64(dn, "ibm,number-of-blocks", &blocks)) {
+ dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n", dn);
+ return -ENODEV;
+ }
+
+ if (of_property_read_string(dn, "ibm,unit-guid", &uuid_str)) {
+ dev_err(&pdev->dev, "%pOF: missing unit-guid!\n", dn);
+ return -ENODEV;
+ }
+
+ /*
+ * open firmware platform device create won't update the NUMA
+ * distance table. For PAPR SCM devices we use numa_map_to_online_node()
+ * to find the nearest online NUMA node and that requires correct
+ * distance table information.
+ */
+ update_numa_distance(dn);
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ /* Initialize the dimm mutex */
+ mutex_init(&p->health_mutex);
+
+ /* optional DT properties */
+ of_property_read_u32(dn, "ibm,metadata-size", &metadata_size);
+
+ p->dn = dn;
+ p->drc_index = drc_index;
+ p->block_size = block_size;
+ p->blocks = blocks;
+ p->is_volatile = !of_property_read_bool(dn, "ibm,cache-flush-required");
+ p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
+
+ if (of_property_read_u64(dn, "ibm,persistence-failed-count",
+ &p->dirty_shutdown_counter))
+ p->dirty_shutdown_counter = 0;
+
+ /* We just need to ensure that set cookies are unique across */
+ uuid_parse(uuid_str, &uuid);
+
+ /*
+ * The cookie1 and cookie2 are not really little endian.
+ * We store a raw buffer representation of the
+ * uuid string so that we can compare this with the label
+ * area cookie irrespective of the endian configuration
+ * with which the kernel is built.
+ *
+ * Historically we stored the cookie in the below format.
+ * for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa
+ * cookie1 was 0xfd423b0b671b5172
+ * cookie2 was 0xaabce8cae35b1d8d
+ */
+ export_uuid(uuid_raw, &uuid);
+ p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]);
+ p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]);
+
+ /* might be zero */
+ p->metadata_size = metadata_size;
+ p->pdev = pdev;
+
+ /* request the hypervisor to bind this region to somewhere in memory */
+ rc = drc_pmem_bind(p);
+
+ /* If phyp says drc memory still bound then force unbound and retry */
+ if (rc == H_OVERLAP)
+ rc = drc_pmem_query_n_bind(p);
+
+ if (rc != H_SUCCESS) {
+ dev_err(&p->pdev->dev, "bind err: %d\n", rc);
+ rc = -ENXIO;
+ goto err;
+ }
+
+ /* setup the resource for the newly bound range */
+ p->res.start = p->bound_addr;
+ p->res.end = p->bound_addr + p->blocks * p->block_size - 1;
+ p->res.name = pdev->name;
+ p->res.flags = IORESOURCE_MEM;
+
+ /* Try retrieving the stat buffer and see if its supported */
+ stat_size = drc_pmem_query_stats(p, NULL, 0);
+ if (stat_size > 0) {
+ p->stat_buffer_len = stat_size;
+ dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
+ p->stat_buffer_len);
+ }
+
+ rc = papr_scm_nvdimm_init(p);
+ if (rc)
+ goto err2;
+
+ platform_set_drvdata(pdev, p);
+ papr_scm_pmu_register(p);
+
+ return 0;
+
+err2: drc_pmem_unbind(p);
+err: kfree(p);
+ return rc;
+}
+
+static int papr_scm_remove(struct platform_device *pdev)
+{
+ struct papr_scm_priv *p = platform_get_drvdata(pdev);
+
+ mutex_lock(&papr_ndr_lock);
+ list_del(&p->region_list);
+ mutex_unlock(&papr_ndr_lock);
+
+ nvdimm_bus_unregister(p->bus);
+ drc_pmem_unbind(p);
+
+ if (pdev->archdata.priv)
+ unregister_nvdimm_pmu(pdev->archdata.priv);
+
+ pdev->archdata.priv = NULL;
+ kfree(p->bus_desc.provider_name);
+ kfree(p);
+
+ return 0;
+}
+
+static const struct of_device_id papr_scm_match[] = {
+ { .compatible = "ibm,pmemory" },
+ { .compatible = "ibm,pmemory-v2" },
+ { },
+};
+
+static struct platform_driver papr_scm_driver = {
+ .probe = papr_scm_probe,
+ .remove = papr_scm_remove,
+ .driver = {
+ .name = "papr_scm",
+ .of_match_table = papr_scm_match,
+ },
+};
+
+static int __init papr_scm_init(void)
+{
+ int ret;
+
+ ret = platform_driver_register(&papr_scm_driver);
+ if (!ret)
+ mce_register_notifier(&mce_ue_nb);
+
+ return ret;
+}
+module_init(papr_scm_init);
+
+static void __exit papr_scm_exit(void)
+{
+ mce_unregister_notifier(&mce_ue_nb);
+ platform_driver_unregister(&papr_scm_driver);
+}
+module_exit(papr_scm_exit);
+
+MODULE_DEVICE_TABLE(of, papr_scm_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
new file mode 100644
index 000000000..1772ae3d1
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * pSeries specific routines for PCI.
+ */
+
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/eeh.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/pci.h>
+#include "pseries.h"
+
+#if 0
+void pcibios_name_device(struct pci_dev *dev)
+{
+ struct device_node *dn;
+
+ /*
+ * Add IBM loc code (slot) as a prefix to the device names for service
+ */
+ dn = pci_device_to_OF_node(dev);
+ if (dn) {
+ const char *loc_code = of_get_property(dn, "ibm,loc-code",
+ NULL);
+ if (loc_code) {
+ int loc_len = strlen(loc_code);
+ if (loc_len < sizeof(dev->dev.name)) {
+ memmove(dev->dev.name+loc_len+1, dev->dev.name,
+ sizeof(dev->dev.name)-loc_len-1);
+ memcpy(dev->dev.name, loc_code, loc_len);
+ dev->dev.name[loc_len] = ' ';
+ dev->dev.name[sizeof(dev->dev.name)-1] = '\0';
+ }
+ }
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device);
+#endif
+
+#ifdef CONFIG_PCI_IOV
+#define MAX_VFS_FOR_MAP_PE 256
+struct pe_map_bar_entry {
+ __be64 bar; /* Input: Virtual Function BAR */
+ __be16 rid; /* Input: Virtual Function Router ID */
+ __be16 pe_num; /* Output: Virtual Function PE Number */
+ __be32 reserved; /* Reserved Space */
+};
+
+static int pseries_send_map_pe(struct pci_dev *pdev, u16 num_vfs,
+ struct pe_map_bar_entry *vf_pe_array)
+{
+ struct pci_dn *pdn;
+ int rc;
+ unsigned long buid, addr;
+ int ibm_map_pes = rtas_function_token(RTAS_FN_IBM_OPEN_SRIOV_MAP_PE_NUMBER);
+
+ if (ibm_map_pes == RTAS_UNKNOWN_SERVICE)
+ return -EINVAL;
+
+ pdn = pci_get_pdn(pdev);
+ addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+ buid = pdn->phb->buid;
+ spin_lock(&rtas_data_buf_lock);
+ memcpy(rtas_data_buf, vf_pe_array,
+ RTAS_DATA_BUF_SIZE);
+ rc = rtas_call(ibm_map_pes, 5, 1, NULL, addr,
+ BUID_HI(buid), BUID_LO(buid),
+ rtas_data_buf,
+ num_vfs * sizeof(struct pe_map_bar_entry));
+ memcpy(vf_pe_array, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+ spin_unlock(&rtas_data_buf_lock);
+
+ if (rc)
+ dev_err(&pdev->dev,
+ "%s: Failed to associate pes PE#%lx, rc=%x\n",
+ __func__, addr, rc);
+
+ return rc;
+}
+
+static void pseries_set_pe_num(struct pci_dev *pdev, u16 vf_index, __be16 pe_num)
+{
+ struct pci_dn *pdn;
+
+ pdn = pci_get_pdn(pdev);
+ pdn->pe_num_map[vf_index] = be16_to_cpu(pe_num);
+ dev_dbg(&pdev->dev, "VF %04x:%02x:%02x.%x associated with PE#%x\n",
+ pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
+ PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)),
+ pdn->pe_num_map[vf_index]);
+}
+
+static int pseries_associate_pes(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pci_dn *pdn;
+ int i, rc, vf_index;
+ struct pe_map_bar_entry *vf_pe_array;
+ struct resource *res;
+ u64 size;
+
+ vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+ if (!vf_pe_array)
+ return -ENOMEM;
+
+ pdn = pci_get_pdn(pdev);
+ /* create firmware structure to associate pes */
+ for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+ pdn->pe_num_map[vf_index] = IODA_INVALID_PE;
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ res = &pdev->resource[i + PCI_IOV_RESOURCES];
+ if (!res->parent)
+ continue;
+ size = pcibios_iov_resource_alignment(pdev, i +
+ PCI_IOV_RESOURCES);
+ vf_pe_array[vf_index].bar =
+ cpu_to_be64(res->start + size * vf_index);
+ vf_pe_array[vf_index].rid =
+ cpu_to_be16((pci_iov_virtfn_bus(pdev, vf_index)
+ << 8) | pci_iov_virtfn_devfn(pdev,
+ vf_index));
+ vf_pe_array[vf_index].pe_num =
+ cpu_to_be16(IODA_INVALID_PE);
+ }
+ }
+
+ rc = pseries_send_map_pe(pdev, num_vfs, vf_pe_array);
+ /* Only zero is success */
+ if (!rc)
+ for (vf_index = 0; vf_index < num_vfs; vf_index++)
+ pseries_set_pe_num(pdev, vf_index,
+ vf_pe_array[vf_index].pe_num);
+
+ kfree(vf_pe_array);
+ return rc;
+}
+
+static int pseries_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ struct pci_dn *pdn;
+ int rc;
+ const int *max_vfs;
+ int max_config_vfs;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ max_vfs = of_get_property(dn, "ibm,number-of-configurable-vfs", NULL);
+
+ if (!max_vfs)
+ return -EINVAL;
+
+ /* First integer stores max config */
+ max_config_vfs = of_read_number(&max_vfs[0], 1);
+ if (max_config_vfs < num_vfs && num_vfs > MAX_VFS_FOR_MAP_PE) {
+ dev_err(&pdev->dev,
+ "Num VFs %x > %x Configurable VFs\n",
+ num_vfs, (num_vfs > MAX_VFS_FOR_MAP_PE) ?
+ MAX_VFS_FOR_MAP_PE : max_config_vfs);
+ return -EINVAL;
+ }
+
+ pdn = pci_get_pdn(pdev);
+ pdn->pe_num_map = kmalloc_array(num_vfs,
+ sizeof(*pdn->pe_num_map),
+ GFP_KERNEL);
+ if (!pdn->pe_num_map)
+ return -ENOMEM;
+
+ rc = pseries_associate_pes(pdev, num_vfs);
+
+ /* Anything other than zero is failure */
+ if (rc) {
+ dev_err(&pdev->dev, "Failure to enable sriov: %x\n", rc);
+ kfree(pdn->pe_num_map);
+ } else {
+ pci_vf_drivers_autoprobe(pdev, false);
+ }
+
+ return rc;
+}
+
+static int pseries_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ /* Allocate PCI data */
+ add_sriov_vf_pdns(pdev);
+ return pseries_pci_sriov_enable(pdev, num_vfs);
+}
+
+static int pseries_pcibios_sriov_disable(struct pci_dev *pdev)
+{
+ struct pci_dn *pdn;
+
+ pdn = pci_get_pdn(pdev);
+ /* Releasing pe_num_map */
+ kfree(pdn->pe_num_map);
+ /* Release PCI data */
+ remove_sriov_vf_pdns(pdev);
+ pci_vf_drivers_autoprobe(pdev, true);
+ return 0;
+}
+#endif
+
+static void __init pSeries_request_regions(void)
+{
+ if (!isa_io_base)
+ return;
+
+ request_region(0x20,0x20,"pic1");
+ request_region(0xa0,0x20,"pic2");
+ request_region(0x00,0x20,"dma1");
+ request_region(0x40,0x20,"timer");
+ request_region(0x80,0x10,"dma page reg");
+ request_region(0xc0,0x20,"dma2");
+}
+
+void __init pSeries_final_fixup(void)
+{
+ pSeries_request_regions();
+
+ eeh_show_enabled();
+
+#ifdef CONFIG_PCI_IOV
+ ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
+ ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
+#endif
+}
+
+/*
+ * Assume the winbond 82c105 is the IDE controller on a
+ * p610/p615/p630. We should probably be more careful in case
+ * someone tries to plug in a similar adapter.
+ */
+static void fixup_winbond_82c105(struct pci_dev* dev)
+{
+ struct resource *r;
+ unsigned int reg;
+
+ if (!machine_is(pseries))
+ return;
+
+ printk("Using INTC for W82c105 IDE controller.\n");
+ pci_read_config_dword(dev, 0x40, &reg);
+ /* Enable LEGIRQ to use INTC instead of ISA interrupts */
+ pci_write_config_dword(dev, 0x40, reg | (1<<11));
+
+ pci_dev_for_each_resource(dev, r) {
+ /* zap the 2nd function of the winbond chip */
+ if (dev->bus->number == 0 && dev->devfn == 0x81 &&
+ r->flags & IORESOURCE_IO)
+ r->flags &= ~IORESOURCE_IO;
+ if (r->start == 0 && r->end) {
+ r->flags = 0;
+ r->end = 0;
+ }
+ }
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
+ fixup_winbond_82c105);
+
+static enum pci_bus_speed prop_to_pci_speed(u32 prop)
+{
+ switch (prop) {
+ case 0x01:
+ return PCIE_SPEED_2_5GT;
+ case 0x02:
+ return PCIE_SPEED_5_0GT;
+ case 0x04:
+ return PCIE_SPEED_8_0GT;
+ case 0x08:
+ return PCIE_SPEED_16_0GT;
+ case 0x10:
+ return PCIE_SPEED_32_0GT;
+ default:
+ pr_debug("Unexpected PCI link speed property value\n");
+ return PCI_SPEED_UNKNOWN;
+ }
+}
+
+int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
+{
+ struct device_node *dn, *pdn;
+ struct pci_bus *bus;
+ u32 pcie_link_speed_stats[2];
+ int rc;
+
+ bus = bridge->bus;
+
+ /* Rely on the pcibios_free_controller_deferred() callback. */
+ pci_set_host_bridge_release(bridge, pcibios_free_controller_deferred,
+ (void *) pci_bus_to_host(bus));
+
+ dn = pcibios_get_phb_of_node(bus);
+ if (!dn)
+ return 0;
+
+ for (pdn = dn; pdn != NULL; pdn = of_get_next_parent(pdn)) {
+ rc = of_property_read_u32_array(pdn,
+ "ibm,pcie-link-speed-stats",
+ &pcie_link_speed_stats[0], 2);
+ if (!rc)
+ break;
+ }
+
+ of_node_put(pdn);
+
+ if (rc) {
+ pr_debug("no ibm,pcie-link-speed-stats property\n");
+ return 0;
+ }
+
+ bus->max_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[0]);
+ bus->cur_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[1]);
+ return 0;
+}
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
new file mode 100644
index 000000000..4ba824568
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PCI Dynamic LPAR, PCI Hot Plug and PCI EEH recovery code
+ * for RPA-compliant PPC64 platform.
+ * Copyright (C) 2003 Linda Xie <lxie@us.ibm.com>
+ * Copyright (C) 2005 International Business Machines
+ *
+ * Updates, 2005, John Rose <johnrose@austin.ibm.com>
+ * Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>
+
+#include "pseries.h"
+
+struct pci_controller *init_phb_dynamic(struct device_node *dn)
+{
+ struct pci_controller *phb;
+
+ pr_debug("PCI: Initializing new hotplug PHB %pOF\n", dn);
+
+ phb = pcibios_alloc_controller(dn);
+ if (!phb)
+ return NULL;
+ rtas_setup_phb(phb);
+ pci_process_bridge_OF_ranges(phb, dn, 0);
+ phb->controller_ops = pseries_pci_controller_ops;
+
+ pci_devs_phb_init_dynamic(phb);
+
+ pseries_msi_allocate_domains(phb);
+
+ /* Create EEH devices for the PHB */
+ eeh_phb_pe_create(phb);
+
+ if (dn->child)
+ pseries_eeh_init_edev_recursive(PCI_DN(dn));
+
+ pcibios_scan_phb(phb);
+ pcibios_finish_adding_to_bus(phb->bus);
+
+ return phb;
+}
+EXPORT_SYMBOL_GPL(init_phb_dynamic);
+
+/* RPA-specific bits for removing PHBs */
+int remove_phb_dynamic(struct pci_controller *phb)
+{
+ struct pci_bus *b = phb->bus;
+ struct pci_host_bridge *host_bridge = to_pci_host_bridge(b->bridge);
+ struct resource *res;
+ int rc, i;
+
+ pr_debug("PCI: Removing PHB %04x:%02x...\n",
+ pci_domain_nr(b), b->number);
+
+ /* We cannot to remove a root bus that has children */
+ if (!(list_empty(&b->children) && list_empty(&b->devices)))
+ return -EBUSY;
+
+ /* We -know- there aren't any child devices anymore at this stage
+ * and thus, we can safely unmap the IO space as it's not in use
+ */
+ res = &phb->io_resource;
+ if (res->flags & IORESOURCE_IO) {
+ rc = pcibios_unmap_io_space(b);
+ if (rc) {
+ printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+ __func__, b->name);
+ return 1;
+ }
+ }
+
+ pseries_msi_free_domains(phb);
+
+ /* Keep a reference so phb isn't freed yet */
+ get_device(&host_bridge->dev);
+
+ /* Remove the PCI bus and unregister the bridge device from sysfs */
+ phb->bus = NULL;
+ pci_remove_bus(b);
+ host_bridge->bus = NULL;
+ device_unregister(&host_bridge->dev);
+
+ /* Now release the IO resource */
+ if (res->flags & IORESOURCE_IO)
+ release_resource(res);
+
+ /* Release memory resources */
+ for (i = 0; i < 3; ++i) {
+ res = &phb->mem_resources[i];
+ if (!(res->flags & IORESOURCE_MEM))
+ continue;
+ release_resource(res);
+ }
+
+ /*
+ * The pci_controller data structure is freed by
+ * the pcibios_free_controller_deferred() callback;
+ * see pseries_root_bridge_prepare().
+ */
+ put_device(&host_bridge->dev);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(remove_phb_dynamic);
diff --git a/arch/powerpc/platforms/pseries/plpks-secvar.c b/arch/powerpc/platforms/pseries/plpks-secvar.c
new file mode 100644
index 000000000..257fd1f8b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/plpks-secvar.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+// Secure variable implementation using the PowerVM LPAR Platform KeyStore (PLPKS)
+//
+// Copyright 2022, 2023 IBM Corporation
+// Authors: Russell Currey
+// Andrew Donnellan
+// Nayna Jain
+
+#define pr_fmt(fmt) "secvar: "fmt
+
+#include <linux/printk.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kobject.h>
+#include <linux/nls.h>
+#include <asm/machdep.h>
+#include <asm/secvar.h>
+#include <asm/plpks.h>
+
+// Config attributes for sysfs
+#define PLPKS_CONFIG_ATTR(name, fmt, func) \
+ static ssize_t name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *buf) \
+ { \
+ return sysfs_emit(buf, fmt, func()); \
+ } \
+ static struct kobj_attribute attr_##name = __ATTR_RO(name)
+
+PLPKS_CONFIG_ATTR(version, "%u\n", plpks_get_version);
+PLPKS_CONFIG_ATTR(max_object_size, "%u\n", plpks_get_maxobjectsize);
+PLPKS_CONFIG_ATTR(total_size, "%u\n", plpks_get_totalsize);
+PLPKS_CONFIG_ATTR(used_space, "%u\n", plpks_get_usedspace);
+PLPKS_CONFIG_ATTR(supported_policies, "%08x\n", plpks_get_supportedpolicies);
+PLPKS_CONFIG_ATTR(signed_update_algorithms, "%016llx\n", plpks_get_signedupdatealgorithms);
+
+static const struct attribute *config_attrs[] = {
+ &attr_version.attr,
+ &attr_max_object_size.attr,
+ &attr_total_size.attr,
+ &attr_used_space.attr,
+ &attr_supported_policies.attr,
+ &attr_signed_update_algorithms.attr,
+ NULL,
+};
+
+static u32 get_policy(const char *name)
+{
+ if ((strcmp(name, "db") == 0) ||
+ (strcmp(name, "dbx") == 0) ||
+ (strcmp(name, "grubdb") == 0) ||
+ (strcmp(name, "grubdbx") == 0) ||
+ (strcmp(name, "sbat") == 0))
+ return (PLPKS_WORLDREADABLE | PLPKS_SIGNEDUPDATE);
+ else
+ return PLPKS_SIGNEDUPDATE;
+}
+
+static const char * const plpks_var_names[] = {
+ "PK",
+ "KEK",
+ "db",
+ "dbx",
+ "grubdb",
+ "grubdbx",
+ "sbat",
+ "moduledb",
+ "trustedcadb",
+ NULL,
+};
+
+static int plpks_get_variable(const char *key, u64 key_len, u8 *data,
+ u64 *data_size)
+{
+ struct plpks_var var = {0};
+ int rc = 0;
+
+ // We subtract 1 from key_len because we don't need to include the
+ // null terminator at the end of the string
+ var.name = kcalloc(key_len - 1, sizeof(wchar_t), GFP_KERNEL);
+ if (!var.name)
+ return -ENOMEM;
+ rc = utf8s_to_utf16s(key, key_len - 1, UTF16_LITTLE_ENDIAN, (wchar_t *)var.name,
+ key_len - 1);
+ if (rc < 0)
+ goto err;
+ var.namelen = rc * 2;
+
+ var.os = PLPKS_VAR_LINUX;
+ if (data) {
+ var.data = data;
+ var.datalen = *data_size;
+ }
+ rc = plpks_read_os_var(&var);
+
+ if (rc)
+ goto err;
+
+ *data_size = var.datalen;
+
+err:
+ kfree(var.name);
+ if (rc && rc != -ENOENT) {
+ pr_err("Failed to read variable '%s': %d\n", key, rc);
+ // Return -EIO since userspace probably doesn't care about the
+ // specific error
+ rc = -EIO;
+ }
+ return rc;
+}
+
+static int plpks_set_variable(const char *key, u64 key_len, u8 *data,
+ u64 data_size)
+{
+ struct plpks_var var = {0};
+ int rc = 0;
+ u64 flags;
+
+ // Secure variables need to be prefixed with 8 bytes of flags.
+ // We only want to perform the write if we have at least one byte of data.
+ if (data_size <= sizeof(flags))
+ return -EINVAL;
+
+ // We subtract 1 from key_len because we don't need to include the
+ // null terminator at the end of the string
+ var.name = kcalloc(key_len - 1, sizeof(wchar_t), GFP_KERNEL);
+ if (!var.name)
+ return -ENOMEM;
+ rc = utf8s_to_utf16s(key, key_len - 1, UTF16_LITTLE_ENDIAN, (wchar_t *)var.name,
+ key_len - 1);
+ if (rc < 0)
+ goto err;
+ var.namelen = rc * 2;
+
+ // Flags are contained in the first 8 bytes of the buffer, and are always big-endian
+ flags = be64_to_cpup((__be64 *)data);
+
+ var.datalen = data_size - sizeof(flags);
+ var.data = data + sizeof(flags);
+ var.os = PLPKS_VAR_LINUX;
+ var.policy = get_policy(key);
+
+ // Unlike in the read case, the plpks error code can be useful to
+ // userspace on write, so we return it rather than just -EIO
+ rc = plpks_signed_update_var(&var, flags);
+
+err:
+ kfree(var.name);
+ return rc;
+}
+
+// PLPKS dynamic secure boot doesn't give us a format string in the same way OPAL does.
+// Instead, report the format using the SB_VERSION variable in the keystore.
+// The string is made up by us, and takes the form "ibm,plpks-sb-v<n>" (or "ibm,plpks-sb-unknown"
+// if the SB_VERSION variable doesn't exist). Hypervisor defines the SB_VERSION variable as a
+// "1 byte unsigned integer value".
+static ssize_t plpks_secvar_format(char *buf, size_t bufsize)
+{
+ struct plpks_var var = {0};
+ ssize_t ret;
+ u8 version;
+
+ var.component = NULL;
+ // Only the signed variables have null bytes in their names, this one doesn't
+ var.name = "SB_VERSION";
+ var.namelen = strlen(var.name);
+ var.datalen = 1;
+ var.data = &version;
+
+ // Unlike the other vars, SB_VERSION is owned by firmware instead of the OS
+ ret = plpks_read_fw_var(&var);
+ if (ret) {
+ if (ret == -ENOENT) {
+ ret = snprintf(buf, bufsize, "ibm,plpks-sb-unknown");
+ } else {
+ pr_err("Error %ld reading SB_VERSION from firmware\n", ret);
+ ret = -EIO;
+ }
+ goto err;
+ }
+
+ ret = snprintf(buf, bufsize, "ibm,plpks-sb-v%hhu", version);
+err:
+ return ret;
+}
+
+static int plpks_max_size(u64 *max_size)
+{
+ // The max object size reported by the hypervisor is accurate for the
+ // object itself, but we use the first 8 bytes of data on write as the
+ // signed update flags, so the max size a user can write is larger.
+ *max_size = (u64)plpks_get_maxobjectsize() + sizeof(u64);
+
+ return 0;
+}
+
+
+static const struct secvar_operations plpks_secvar_ops = {
+ .get = plpks_get_variable,
+ .set = plpks_set_variable,
+ .format = plpks_secvar_format,
+ .max_size = plpks_max_size,
+ .config_attrs = config_attrs,
+ .var_names = plpks_var_names,
+};
+
+static int plpks_secvar_init(void)
+{
+ if (!plpks_is_available())
+ return -ENODEV;
+
+ return set_secvar_ops(&plpks_secvar_ops);
+}
+machine_device_initcall(pseries, plpks_secvar_init);
diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
new file mode 100644
index 000000000..2d40304eb
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -0,0 +1,711 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * POWER LPAR Platform KeyStore(PLPKS)
+ * Copyright (C) 2022 IBM Corporation
+ * Author: Nayna Jain <nayna@linux.ibm.com>
+ *
+ * Provides access to variables stored in Power LPAR Platform KeyStore(PLPKS).
+ */
+
+#define pr_fmt(fmt) "plpks: " fmt
+
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/memblock.h>
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+#include <asm/plpks.h>
+#include <asm/firmware.h>
+
+static u8 *ospassword;
+static u16 ospasswordlength;
+
+// Retrieved with H_PKS_GET_CONFIG
+static u8 version;
+static u16 objoverhead;
+static u16 maxpwsize;
+static u16 maxobjsize;
+static s16 maxobjlabelsize;
+static u32 totalsize;
+static u32 usedspace;
+static u32 supportedpolicies;
+static u32 maxlargeobjectsize;
+static u64 signedupdatealgorithms;
+
+struct plpks_auth {
+ u8 version;
+ u8 consumer;
+ __be64 rsvd0;
+ __be32 rsvd1;
+ __be16 passwordlength;
+ u8 password[];
+} __packed __aligned(16);
+
+struct label_attr {
+ u8 prefix[8];
+ u8 version;
+ u8 os;
+ u8 length;
+ u8 reserved[5];
+};
+
+struct label {
+ struct label_attr attr;
+ u8 name[PLPKS_MAX_NAME_SIZE];
+ size_t size;
+};
+
+static int pseries_status_to_err(int rc)
+{
+ int err;
+
+ switch (rc) {
+ case H_SUCCESS:
+ err = 0;
+ break;
+ case H_FUNCTION:
+ err = -ENXIO;
+ break;
+ case H_PARAMETER:
+ case H_P2:
+ case H_P3:
+ case H_P4:
+ case H_P5:
+ case H_P6:
+ err = -EINVAL;
+ break;
+ case H_NOT_FOUND:
+ err = -ENOENT;
+ break;
+ case H_BUSY:
+ case H_LONG_BUSY_ORDER_1_MSEC:
+ case H_LONG_BUSY_ORDER_10_MSEC:
+ case H_LONG_BUSY_ORDER_100_MSEC:
+ case H_LONG_BUSY_ORDER_1_SEC:
+ case H_LONG_BUSY_ORDER_10_SEC:
+ case H_LONG_BUSY_ORDER_100_SEC:
+ err = -EBUSY;
+ break;
+ case H_AUTHORITY:
+ err = -EPERM;
+ break;
+ case H_NO_MEM:
+ err = -ENOMEM;
+ break;
+ case H_RESOURCE:
+ err = -EEXIST;
+ break;
+ case H_TOO_BIG:
+ err = -EFBIG;
+ break;
+ case H_STATE:
+ err = -EIO;
+ break;
+ case H_R_STATE:
+ err = -EIO;
+ break;
+ case H_IN_USE:
+ err = -EEXIST;
+ break;
+ case H_ABORTED:
+ err = -EIO;
+ break;
+ default:
+ err = -EINVAL;
+ }
+
+ pr_debug("Converted hypervisor code %d to Linux %d\n", rc, err);
+
+ return err;
+}
+
+static int plpks_gen_password(void)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ u8 *password, consumer = PLPKS_OS_OWNER;
+ int rc;
+
+ // If we booted from kexec, we could be reusing an existing password already
+ if (ospassword) {
+ pr_debug("Password of length %u already in use\n", ospasswordlength);
+ return 0;
+ }
+
+ // The password must not cross a page boundary, so we align to the next power of 2
+ password = kzalloc(roundup_pow_of_two(maxpwsize), GFP_KERNEL);
+ if (!password)
+ return -ENOMEM;
+
+ rc = plpar_hcall(H_PKS_GEN_PASSWORD, retbuf, consumer, 0,
+ virt_to_phys(password), maxpwsize);
+
+ if (!rc) {
+ ospasswordlength = maxpwsize;
+ ospassword = kzalloc(maxpwsize, GFP_KERNEL);
+ if (!ospassword) {
+ kfree(password);
+ return -ENOMEM;
+ }
+ memcpy(ospassword, password, ospasswordlength);
+ } else {
+ if (rc == H_IN_USE) {
+ pr_warn("Password already set - authenticated operations will fail\n");
+ rc = 0;
+ } else {
+ goto out;
+ }
+ }
+out:
+ kfree(password);
+
+ return pseries_status_to_err(rc);
+}
+
+static struct plpks_auth *construct_auth(u8 consumer)
+{
+ struct plpks_auth *auth;
+
+ if (consumer > PLPKS_OS_OWNER)
+ return ERR_PTR(-EINVAL);
+
+ // The auth structure must not cross a page boundary and must be
+ // 16 byte aligned. We align to the next largest power of 2
+ auth = kzalloc(roundup_pow_of_two(struct_size(auth, password, maxpwsize)), GFP_KERNEL);
+ if (!auth)
+ return ERR_PTR(-ENOMEM);
+
+ auth->version = 1;
+ auth->consumer = consumer;
+
+ if (consumer == PLPKS_FW_OWNER || consumer == PLPKS_BOOTLOADER_OWNER)
+ return auth;
+
+ memcpy(auth->password, ospassword, ospasswordlength);
+
+ auth->passwordlength = cpu_to_be16(ospasswordlength);
+
+ return auth;
+}
+
+/*
+ * Label is combination of label attributes + name.
+ * Label attributes are used internally by kernel and not exposed to the user.
+ */
+static struct label *construct_label(char *component, u8 varos, u8 *name,
+ u16 namelen)
+{
+ struct label *label;
+ size_t slen = 0;
+
+ if (!name || namelen > PLPKS_MAX_NAME_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ // Support NULL component for signed updates
+ if (component) {
+ slen = strlen(component);
+ if (slen > sizeof(label->attr.prefix))
+ return ERR_PTR(-EINVAL);
+ }
+
+ // The label structure must not cross a page boundary, so we align to the next power of 2
+ label = kzalloc(roundup_pow_of_two(sizeof(*label)), GFP_KERNEL);
+ if (!label)
+ return ERR_PTR(-ENOMEM);
+
+ if (component)
+ memcpy(&label->attr.prefix, component, slen);
+
+ label->attr.version = PLPKS_LABEL_VERSION;
+ label->attr.os = varos;
+ label->attr.length = PLPKS_MAX_LABEL_ATTR_SIZE;
+ memcpy(&label->name, name, namelen);
+
+ label->size = sizeof(struct label_attr) + namelen;
+
+ return label;
+}
+
+static int _plpks_get_config(void)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ struct config {
+ u8 version;
+ u8 flags;
+ __be16 rsvd0;
+ __be16 objoverhead;
+ __be16 maxpwsize;
+ __be16 maxobjlabelsize;
+ __be16 maxobjsize;
+ __be32 totalsize;
+ __be32 usedspace;
+ __be32 supportedpolicies;
+ __be32 maxlargeobjectsize;
+ __be64 signedupdatealgorithms;
+ u8 rsvd1[476];
+ } __packed * config;
+ size_t size;
+ int rc = 0;
+
+ size = sizeof(*config);
+
+ // Config struct must not cross a page boundary. So long as the struct
+ // size is a power of 2, this should be fine as alignment is guaranteed
+ config = kzalloc(size, GFP_KERNEL);
+ if (!config) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = plpar_hcall(H_PKS_GET_CONFIG, retbuf, virt_to_phys(config), size);
+
+ if (rc != H_SUCCESS) {
+ rc = pseries_status_to_err(rc);
+ goto err;
+ }
+
+ version = config->version;
+ objoverhead = be16_to_cpu(config->objoverhead);
+ maxpwsize = be16_to_cpu(config->maxpwsize);
+ maxobjsize = be16_to_cpu(config->maxobjsize);
+ maxobjlabelsize = be16_to_cpu(config->maxobjlabelsize);
+ totalsize = be32_to_cpu(config->totalsize);
+ usedspace = be32_to_cpu(config->usedspace);
+ supportedpolicies = be32_to_cpu(config->supportedpolicies);
+ maxlargeobjectsize = be32_to_cpu(config->maxlargeobjectsize);
+ signedupdatealgorithms = be64_to_cpu(config->signedupdatealgorithms);
+
+ // Validate that the numbers we get back match the requirements of the spec
+ if (maxpwsize < 32) {
+ pr_err("Invalid Max Password Size received from hypervisor (%d < 32)\n", maxpwsize);
+ rc = -EIO;
+ goto err;
+ }
+
+ if (maxobjlabelsize < 255) {
+ pr_err("Invalid Max Object Label Size received from hypervisor (%d < 255)\n",
+ maxobjlabelsize);
+ rc = -EIO;
+ goto err;
+ }
+
+ if (totalsize < 4096) {
+ pr_err("Invalid Total Size received from hypervisor (%d < 4096)\n", totalsize);
+ rc = -EIO;
+ goto err;
+ }
+
+ if (version >= 3 && maxlargeobjectsize >= 65536 && maxobjsize != 0xFFFF) {
+ pr_err("Invalid Max Object Size (0x%x != 0xFFFF)\n", maxobjsize);
+ rc = -EIO;
+ goto err;
+ }
+
+err:
+ kfree(config);
+ return rc;
+}
+
+u8 plpks_get_version(void)
+{
+ return version;
+}
+
+u16 plpks_get_objoverhead(void)
+{
+ return objoverhead;
+}
+
+u16 plpks_get_maxpwsize(void)
+{
+ return maxpwsize;
+}
+
+u16 plpks_get_maxobjectsize(void)
+{
+ return maxobjsize;
+}
+
+u16 plpks_get_maxobjectlabelsize(void)
+{
+ return maxobjlabelsize;
+}
+
+u32 plpks_get_totalsize(void)
+{
+ return totalsize;
+}
+
+u32 plpks_get_usedspace(void)
+{
+ // Unlike other config values, usedspace regularly changes as objects
+ // are updated, so we need to refresh.
+ int rc = _plpks_get_config();
+ if (rc) {
+ pr_err("Couldn't get config, rc: %d\n", rc);
+ return 0;
+ }
+ return usedspace;
+}
+
+u32 plpks_get_supportedpolicies(void)
+{
+ return supportedpolicies;
+}
+
+u32 plpks_get_maxlargeobjectsize(void)
+{
+ return maxlargeobjectsize;
+}
+
+u64 plpks_get_signedupdatealgorithms(void)
+{
+ return signedupdatealgorithms;
+}
+
+u16 plpks_get_passwordlen(void)
+{
+ return ospasswordlength;
+}
+
+bool plpks_is_available(void)
+{
+ int rc;
+
+ if (!firmware_has_feature(FW_FEATURE_PLPKS))
+ return false;
+
+ rc = _plpks_get_config();
+ if (rc)
+ return false;
+
+ return true;
+}
+
+static int plpks_confirm_object_flushed(struct label *label,
+ struct plpks_auth *auth)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ bool timed_out = true;
+ u64 timeout = 0;
+ u8 status;
+ int rc;
+
+ do {
+ rc = plpar_hcall(H_PKS_CONFIRM_OBJECT_FLUSHED, retbuf,
+ virt_to_phys(auth), virt_to_phys(label),
+ label->size);
+
+ status = retbuf[0];
+ if (rc) {
+ timed_out = false;
+ if (rc == H_NOT_FOUND && status == 1)
+ rc = 0;
+ break;
+ }
+
+ if (!rc && status == 1) {
+ timed_out = false;
+ break;
+ }
+
+ usleep_range(PLPKS_FLUSH_SLEEP,
+ PLPKS_FLUSH_SLEEP + PLPKS_FLUSH_SLEEP_RANGE);
+ timeout = timeout + PLPKS_FLUSH_SLEEP;
+ } while (timeout < PLPKS_MAX_TIMEOUT);
+
+ if (timed_out)
+ return -ETIMEDOUT;
+
+ return pseries_status_to_err(rc);
+}
+
+int plpks_signed_update_var(struct plpks_var *var, u64 flags)
+{
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+ int rc;
+ struct label *label;
+ struct plpks_auth *auth;
+ u64 continuetoken = 0;
+ u64 timeout = 0;
+
+ if (!var->data || var->datalen <= 0 || var->namelen > PLPKS_MAX_NAME_SIZE)
+ return -EINVAL;
+
+ if (!(var->policy & PLPKS_SIGNEDUPDATE))
+ return -EINVAL;
+
+ // Signed updates need the component to be NULL.
+ if (var->component)
+ return -EINVAL;
+
+ auth = construct_auth(PLPKS_OS_OWNER);
+ if (IS_ERR(auth))
+ return PTR_ERR(auth);
+
+ label = construct_label(var->component, var->os, var->name, var->namelen);
+ if (IS_ERR(label)) {
+ rc = PTR_ERR(label);
+ goto out;
+ }
+
+ do {
+ rc = plpar_hcall9(H_PKS_SIGNED_UPDATE, retbuf,
+ virt_to_phys(auth), virt_to_phys(label),
+ label->size, var->policy, flags,
+ virt_to_phys(var->data), var->datalen,
+ continuetoken);
+
+ continuetoken = retbuf[0];
+ if (pseries_status_to_err(rc) == -EBUSY) {
+ int delay_ms = get_longbusy_msecs(rc);
+ mdelay(delay_ms);
+ timeout += delay_ms;
+ }
+ rc = pseries_status_to_err(rc);
+ } while (rc == -EBUSY && timeout < PLPKS_MAX_TIMEOUT);
+
+ if (!rc)
+ rc = plpks_confirm_object_flushed(label, auth);
+
+ kfree(label);
+out:
+ kfree(auth);
+
+ return rc;
+}
+
+int plpks_write_var(struct plpks_var var)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ struct plpks_auth *auth;
+ struct label *label;
+ int rc;
+
+ if (!var.component || !var.data || var.datalen <= 0 ||
+ var.namelen > PLPKS_MAX_NAME_SIZE || var.datalen > PLPKS_MAX_DATA_SIZE)
+ return -EINVAL;
+
+ if (var.policy & PLPKS_SIGNEDUPDATE)
+ return -EINVAL;
+
+ auth = construct_auth(PLPKS_OS_OWNER);
+ if (IS_ERR(auth))
+ return PTR_ERR(auth);
+
+ label = construct_label(var.component, var.os, var.name, var.namelen);
+ if (IS_ERR(label)) {
+ rc = PTR_ERR(label);
+ goto out;
+ }
+
+ rc = plpar_hcall(H_PKS_WRITE_OBJECT, retbuf, virt_to_phys(auth),
+ virt_to_phys(label), label->size, var.policy,
+ virt_to_phys(var.data), var.datalen);
+
+ if (!rc)
+ rc = plpks_confirm_object_flushed(label, auth);
+
+ rc = pseries_status_to_err(rc);
+ kfree(label);
+out:
+ kfree(auth);
+
+ return rc;
+}
+
+int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ struct plpks_auth *auth;
+ struct label *label;
+ int rc;
+
+ if (vname.namelen > PLPKS_MAX_NAME_SIZE)
+ return -EINVAL;
+
+ auth = construct_auth(PLPKS_OS_OWNER);
+ if (IS_ERR(auth))
+ return PTR_ERR(auth);
+
+ label = construct_label(component, varos, vname.name, vname.namelen);
+ if (IS_ERR(label)) {
+ rc = PTR_ERR(label);
+ goto out;
+ }
+
+ rc = plpar_hcall(H_PKS_REMOVE_OBJECT, retbuf, virt_to_phys(auth),
+ virt_to_phys(label), label->size);
+
+ if (!rc)
+ rc = plpks_confirm_object_flushed(label, auth);
+
+ rc = pseries_status_to_err(rc);
+ kfree(label);
+out:
+ kfree(auth);
+
+ return rc;
+}
+
+static int plpks_read_var(u8 consumer, struct plpks_var *var)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
+ struct plpks_auth *auth;
+ struct label *label = NULL;
+ u8 *output;
+ int rc;
+
+ if (var->namelen > PLPKS_MAX_NAME_SIZE)
+ return -EINVAL;
+
+ auth = construct_auth(consumer);
+ if (IS_ERR(auth))
+ return PTR_ERR(auth);
+
+ if (consumer == PLPKS_OS_OWNER) {
+ label = construct_label(var->component, var->os, var->name,
+ var->namelen);
+ if (IS_ERR(label)) {
+ rc = PTR_ERR(label);
+ goto out_free_auth;
+ }
+ }
+
+ output = kzalloc(maxobjsize, GFP_KERNEL);
+ if (!output) {
+ rc = -ENOMEM;
+ goto out_free_label;
+ }
+
+ if (consumer == PLPKS_OS_OWNER)
+ rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
+ virt_to_phys(label), label->size, virt_to_phys(output),
+ maxobjsize);
+ else
+ rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
+ virt_to_phys(var->name), var->namelen, virt_to_phys(output),
+ maxobjsize);
+
+
+ if (rc != H_SUCCESS) {
+ rc = pseries_status_to_err(rc);
+ goto out_free_output;
+ }
+
+ if (!var->data || var->datalen > retbuf[0])
+ var->datalen = retbuf[0];
+
+ var->policy = retbuf[1];
+
+ if (var->data)
+ memcpy(var->data, output, var->datalen);
+
+ rc = 0;
+
+out_free_output:
+ kfree(output);
+out_free_label:
+ kfree(label);
+out_free_auth:
+ kfree(auth);
+
+ return rc;
+}
+
+int plpks_read_os_var(struct plpks_var *var)
+{
+ return plpks_read_var(PLPKS_OS_OWNER, var);
+}
+
+int plpks_read_fw_var(struct plpks_var *var)
+{
+ return plpks_read_var(PLPKS_FW_OWNER, var);
+}
+
+int plpks_read_bootloader_var(struct plpks_var *var)
+{
+ return plpks_read_var(PLPKS_BOOTLOADER_OWNER, var);
+}
+
+int plpks_populate_fdt(void *fdt)
+{
+ int chosen_offset = fdt_path_offset(fdt, "/chosen");
+
+ if (chosen_offset < 0) {
+ pr_err("Can't find chosen node: %s\n",
+ fdt_strerror(chosen_offset));
+ return chosen_offset;
+ }
+
+ return fdt_setprop(fdt, chosen_offset, "ibm,plpks-pw", ospassword, ospasswordlength);
+}
+
+// Once a password is registered with the hypervisor it cannot be cleared without
+// rebooting the LPAR, so to keep using the PLPKS across kexec boots we need to
+// recover the previous password from the FDT.
+//
+// There are a few challenges here. We don't want the password to be visible to
+// users, so we need to clear it from the FDT. This has to be done in early boot.
+// Clearing it from the FDT would make the FDT's checksum invalid, so we have to
+// manually cause the checksum to be recalculated.
+void __init plpks_early_init_devtree(void)
+{
+ void *fdt = initial_boot_params;
+ int chosen_node = fdt_path_offset(fdt, "/chosen");
+ const u8 *password;
+ int len;
+
+ if (chosen_node < 0)
+ return;
+
+ password = fdt_getprop(fdt, chosen_node, "ibm,plpks-pw", &len);
+ if (len <= 0) {
+ pr_debug("Couldn't find ibm,plpks-pw node.\n");
+ return;
+ }
+
+ ospassword = memblock_alloc_raw(len, SMP_CACHE_BYTES);
+ if (!ospassword) {
+ pr_err("Error allocating memory for password.\n");
+ goto out;
+ }
+
+ memcpy(ospassword, password, len);
+ ospasswordlength = (u16)len;
+
+out:
+ fdt_nop_property(fdt, chosen_node, "ibm,plpks-pw");
+ // Since we've cleared the password, we must update the FDT checksum
+ early_init_dt_verify(fdt);
+}
+
+static __init int pseries_plpks_init(void)
+{
+ int rc;
+
+ if (!firmware_has_feature(FW_FEATURE_PLPKS))
+ return -ENODEV;
+
+ rc = _plpks_get_config();
+
+ if (rc) {
+ pr_err("POWER LPAR Platform KeyStore is not supported or enabled\n");
+ return rc;
+ }
+
+ rc = plpks_gen_password();
+ if (rc)
+ pr_err("Failed setting POWER LPAR Platform KeyStore Password\n");
+ else
+ pr_info("POWER LPAR Platform KeyStore initialized successfully\n");
+
+ return rc;
+}
+machine_arch_initcall(pseries, pseries_plpks_init);
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
new file mode 100644
index 000000000..3c290b9ed
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Handles hot and cold plug of persistent memory regions on pseries.
+ */
+
+#define pr_fmt(fmt) "pseries-pmem: " fmt
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/sched.h> /* for idle_task_exit */
+#include <linux/sched/hotplug.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <asm/rtas.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/vdso_datapage.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
+
+#include "pseries.h"
+
+static struct device_node *pmem_node;
+
+static ssize_t pmem_drc_add_node(u32 drc_index)
+{
+ struct device_node *dn;
+ int rc;
+
+ pr_debug("Attempting to add pmem node, drc index: %x\n", drc_index);
+
+ rc = dlpar_acquire_drc(drc_index);
+ if (rc) {
+ pr_err("Failed to acquire DRC, rc: %d, drc index: %x\n",
+ rc, drc_index);
+ return -EINVAL;
+ }
+
+ dn = dlpar_configure_connector(cpu_to_be32(drc_index), pmem_node);
+ if (!dn) {
+ pr_err("configure-connector failed for drc %x\n", drc_index);
+ dlpar_release_drc(drc_index);
+ return -EINVAL;
+ }
+
+ /* NB: The of reconfig notifier creates platform device from the node */
+ rc = dlpar_attach_node(dn, pmem_node);
+ if (rc) {
+ pr_err("Failed to attach node %pOF, rc: %d, drc index: %x\n",
+ dn, rc, drc_index);
+
+ if (dlpar_release_drc(drc_index))
+ dlpar_free_cc_nodes(dn);
+
+ return rc;
+ }
+
+ pr_info("Successfully added %pOF, drc index: %x\n", dn, drc_index);
+
+ return 0;
+}
+
+static ssize_t pmem_drc_remove_node(u32 drc_index)
+{
+ struct device_node *dn;
+ uint32_t index;
+ int rc;
+
+ for_each_child_of_node(pmem_node, dn) {
+ if (of_property_read_u32(dn, "ibm,my-drc-index", &index))
+ continue;
+ if (index == drc_index)
+ break;
+ }
+
+ if (!dn) {
+ pr_err("Attempting to remove unused DRC index %x\n", drc_index);
+ return -ENODEV;
+ }
+
+ pr_debug("Attempting to remove %pOF, drc index: %x\n", dn, drc_index);
+
+ /* * NB: tears down the ibm,pmemory device as a side-effect */
+ rc = dlpar_detach_node(dn);
+ if (rc)
+ return rc;
+
+ rc = dlpar_release_drc(drc_index);
+ if (rc) {
+ pr_err("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
+ drc_index, dn, rc);
+ dlpar_attach_node(dn, pmem_node);
+ return rc;
+ }
+
+ pr_info("Successfully removed PMEM with drc index: %x\n", drc_index);
+
+ return 0;
+}
+
+int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
+{
+ u32 drc_index;
+ int rc;
+
+ /* slim chance, but we might get a hotplug event while booting */
+ if (!pmem_node)
+ pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
+ if (!pmem_node) {
+ pr_err("Hotplug event for a pmem device, but none exists\n");
+ return -ENODEV;
+ }
+
+ if (hp_elog->id_type != PSERIES_HP_ELOG_ID_DRC_INDEX) {
+ pr_err("Unsupported hotplug event type %d\n",
+ hp_elog->id_type);
+ return -EINVAL;
+ }
+
+ drc_index = hp_elog->_drc_u.drc_index;
+
+ lock_device_hotplug();
+
+ if (hp_elog->action == PSERIES_HP_ELOG_ACTION_ADD) {
+ rc = pmem_drc_add_node(drc_index);
+ } else if (hp_elog->action == PSERIES_HP_ELOG_ACTION_REMOVE) {
+ rc = pmem_drc_remove_node(drc_index);
+ } else {
+ pr_err("Unsupported hotplug action (%d)\n", hp_elog->action);
+ rc = -EINVAL;
+ }
+
+ unlock_device_hotplug();
+ return rc;
+}
+
+static const struct of_device_id drc_pmem_match[] = {
+ { .type = "ibm,persistent-memory", },
+ {}
+};
+
+static int pseries_pmem_init(void)
+{
+ /*
+ * Only supported on POWER8 and above.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return 0;
+
+ pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
+ if (!pmem_node)
+ return 0;
+
+ /*
+ * The generic OF bus probe/populate handles creating platform devices
+ * from the child (ibm,pmemory) nodes. The generic code registers an of
+ * reconfig notifier to handle the hot-add/remove cases too.
+ */
+ of_platform_bus_probe(pmem_node, drc_pmem_match, NULL);
+
+ return 0;
+}
+machine_arch_initcall(pseries, pseries_pmem_init);
diff --git a/arch/powerpc/platforms/pseries/power.c b/arch/powerpc/platforms/pseries/power.c
new file mode 100644
index 000000000..3676cb297
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/power.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Interface for power-management for ppc64 compliant platform
+ *
+ * Manish Ahuja <mahuja@us.ibm.com>
+ *
+ * Feb 2007
+ *
+ * Copyright (C) 2007 IBM Corporation.
+ */
+
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/machdep.h>
+
+#include "pseries.h"
+
+unsigned long rtas_poweron_auto; /* default and normal state is 0 */
+
+static ssize_t auto_poweron_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lu\n", rtas_poweron_auto);
+}
+
+static ssize_t auto_poweron_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ int ret;
+ unsigned long ups_restart;
+ ret = sscanf(buf, "%lu", &ups_restart);
+
+ if ((ret == 1) && ((ups_restart == 1) || (ups_restart == 0))){
+ rtas_poweron_auto = ups_restart;
+ return n;
+ }
+ return -EINVAL;
+}
+
+static struct kobj_attribute auto_poweron_attr =
+ __ATTR(auto_poweron, 0644, auto_poweron_show, auto_poweron_store);
+
+#ifndef CONFIG_PM
+struct kobject *power_kobj;
+
+static struct attribute *g[] = {
+ &auto_poweron_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group attr_group = {
+ .attrs = g,
+};
+
+static int __init pm_init(void)
+{
+ power_kobj = kobject_create_and_add("power", NULL);
+ if (!power_kobj)
+ return -ENOMEM;
+ return sysfs_create_group(power_kobj, &attr_group);
+}
+machine_core_initcall(pseries, pm_init);
+#else
+static int __init apo_pm_init(void)
+{
+ return (sysfs_create_file(power_kobj, &auto_poweron_attr.attr));
+}
+machine_device_initcall(pseries, apo_pm_init);
+#endif
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
new file mode 100644
index 000000000..8376f03f9
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2006 IBM Corporation.
+ */
+
+#ifndef _PSERIES_PSERIES_H
+#define _PSERIES_PSERIES_H
+
+#include <linux/interrupt.h>
+#include <asm/rtas.h>
+
+struct device_node;
+
+void __init request_event_sources_irqs(struct device_node *np,
+ irq_handler_t handler, const char *name);
+
+#include <linux/of.h>
+
+struct pt_regs;
+
+extern int pSeries_system_reset_exception(struct pt_regs *regs);
+extern int pSeries_machine_check_exception(struct pt_regs *regs);
+extern long pseries_machine_check_realmode(struct pt_regs *regs);
+void pSeries_machine_check_log_err(void);
+
+#ifdef CONFIG_SMP
+extern void smp_init_pseries(void);
+
+/* Get state of physical CPU from query_cpu_stopped */
+int smp_query_cpu_stopped(unsigned int pcpu);
+#define QCSS_STOPPED 0
+#define QCSS_STOPPING 1
+#define QCSS_NOT_STOPPED 2
+#define QCSS_HARDWARE_ERROR -1
+#define QCSS_HARDWARE_BUSY -2
+#else
+static inline void smp_init_pseries(void) { }
+#endif
+
+extern void pseries_kexec_cpu_down(int crash_shutdown, int secondary);
+void pseries_machine_kexec(struct kimage *image);
+
+extern void pSeries_final_fixup(void);
+
+/* Poweron flag used for enabling auto ups restart */
+extern unsigned long rtas_poweron_auto;
+
+/* Dynamic logical Partitioning/Mobility */
+extern void dlpar_free_cc_nodes(struct device_node *);
+extern void dlpar_free_cc_property(struct property *);
+extern struct device_node *dlpar_configure_connector(__be32,
+ struct device_node *);
+extern int dlpar_attach_node(struct device_node *, struct device_node *);
+extern int dlpar_detach_node(struct device_node *);
+extern int dlpar_acquire_drc(u32 drc_index);
+extern int dlpar_release_drc(u32 drc_index);
+extern int dlpar_unisolate_drc(u32 drc_index);
+
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
+int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog);
+#else
+static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
+{
+ return -EOPNOTSUPP;
+}
+static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+int dlpar_cpu(struct pseries_hp_errorlog *hp_elog);
+void pseries_cpu_hotplug_init(void);
+#else
+static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
+{
+ return -EOPNOTSUPP;
+}
+static inline void pseries_cpu_hotplug_init(void) { }
+#endif
+
+/* PCI root bridge prepare function override for pseries */
+struct pci_host_bridge;
+int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
+
+extern struct pci_controller_ops pseries_pci_controller_ops;
+int pseries_msi_allocate_domains(struct pci_controller *phb);
+void pseries_msi_free_domains(struct pci_controller *phb);
+
+extern int CMO_PrPSP;
+extern int CMO_SecPSP;
+extern unsigned long CMO_PageSize;
+
+static inline int cmo_get_primary_psp(void)
+{
+ return CMO_PrPSP;
+}
+
+static inline int cmo_get_secondary_psp(void)
+{
+ return CMO_SecPSP;
+}
+
+static inline unsigned long cmo_get_page_size(void)
+{
+ return CMO_PageSize;
+}
+
+int dlpar_workqueue_init(void);
+
+extern u32 pseries_security_flavor;
+void pseries_setup_security_mitigations(void);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void pseries_lpar_read_hblkrm_characteristics(void);
+#else
+static inline void pseries_lpar_read_hblkrm_characteristics(void) { }
+#endif
+
+void pseries_rng_init(void);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose,
+ struct pci_dev *pdev);
+#endif
+
+#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
new file mode 100644
index 000000000..2c661b798
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * POWER platform energy management driver
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This pseries platform device driver provides access to
+ * platform energy management capabilities.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <asm/cputhreads.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/prom.h>
+
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "pseries_energy"
+
+/* Driver flags */
+
+static int sysfs_entries;
+
+/* Helper routines */
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+static u32 cpu_to_drc_index(int cpu)
+{
+ struct device_node *dn = NULL;
+ struct property *info;
+ int thread_index;
+ int rc = 1;
+ u32 ret = 0;
+
+ dn = of_find_node_by_path("/cpus");
+ if (dn == NULL)
+ goto err;
+
+ /* Convert logical cpu number to core number */
+ thread_index = cpu_core_index_of_thread(cpu);
+
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info) {
+ struct of_drc_info drc;
+ int j;
+ u32 num_set_entries;
+ const __be32 *value;
+
+ value = of_prop_next_u32(info, NULL, &num_set_entries);
+ if (!value)
+ goto err_of_node_put;
+ else
+ value++;
+
+ for (j = 0; j < num_set_entries; j++) {
+
+ of_read_drc_info_cell(&info, &value, &drc);
+ if (strncmp(drc.drc_type, "CPU", 3))
+ goto err;
+
+ if (thread_index < drc.last_drc_index)
+ break;
+ }
+
+ ret = drc.drc_index_start + (thread_index * drc.sequential_inc);
+ } else {
+ u32 nr_drc_indexes, thread_drc_index;
+
+ /*
+ * The first element of ibm,drc-indexes array is the
+ * number of drc_indexes returned in the list. Hence
+ * thread_index+1 will get the drc_index corresponding
+ * to core number thread_index.
+ */
+ rc = of_property_read_u32_index(dn, "ibm,drc-indexes",
+ 0, &nr_drc_indexes);
+ if (rc)
+ goto err_of_node_put;
+
+ WARN_ON_ONCE(thread_index > nr_drc_indexes);
+ rc = of_property_read_u32_index(dn, "ibm,drc-indexes",
+ thread_index + 1,
+ &thread_drc_index);
+ if (rc)
+ goto err_of_node_put;
+
+ ret = thread_drc_index;
+ }
+
+ rc = 0;
+
+err_of_node_put:
+ of_node_put(dn);
+err:
+ if (rc)
+ printk(KERN_WARNING "cpu_to_drc_index(%d) failed", cpu);
+ return ret;
+}
+
+static int drc_index_to_cpu(u32 drc_index)
+{
+ struct device_node *dn = NULL;
+ struct property *info;
+ const int *indexes;
+ int thread_index = 0, cpu = 0;
+ int rc = 1;
+
+ dn = of_find_node_by_path("/cpus");
+ if (dn == NULL)
+ goto err;
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info) {
+ struct of_drc_info drc;
+ int j;
+ u32 num_set_entries;
+ const __be32 *value;
+
+ value = of_prop_next_u32(info, NULL, &num_set_entries);
+ if (!value)
+ goto err_of_node_put;
+ else
+ value++;
+
+ for (j = 0; j < num_set_entries; j++) {
+
+ of_read_drc_info_cell(&info, &value, &drc);
+ if (strncmp(drc.drc_type, "CPU", 3))
+ goto err;
+
+ if (drc_index > drc.last_drc_index) {
+ cpu += drc.num_sequential_elems;
+ continue;
+ }
+ cpu += ((drc_index - drc.drc_index_start) /
+ drc.sequential_inc);
+
+ thread_index = cpu_first_thread_of_core(cpu);
+ rc = 0;
+ break;
+ }
+ } else {
+ unsigned long int i;
+
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+ /*
+ * First element in the array is the number of drc_indexes
+ * returned. Search through the list to find the matching
+ * drc_index and get the core number
+ */
+ for (i = 0; i < indexes[0]; i++) {
+ if (indexes[i + 1] == drc_index)
+ break;
+ }
+ /* Convert core number to logical cpu number */
+ thread_index = cpu_first_thread_of_core(i);
+ rc = 0;
+ }
+
+err_of_node_put:
+ of_node_put(dn);
+err:
+ if (rc)
+ printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index);
+ return thread_index;
+}
+
+/*
+ * pseries hypervisor call H_BEST_ENERGY provides hints to OS on
+ * preferred logical cpus to activate or deactivate for optimized
+ * energy consumption.
+ */
+
+#define FLAGS_MODE1 0x004E200000080E01UL
+#define FLAGS_MODE2 0x004E200000080401UL
+#define FLAGS_ACTIVATE 0x100
+
+static ssize_t get_best_energy_list(char *page, int activate)
+{
+ int rc, cnt, i, cpu;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+ unsigned long flags = 0;
+ u32 *buf_page;
+ char *s = page;
+
+ buf_page = (u32 *) get_zeroed_page(GFP_KERNEL);
+ if (!buf_page)
+ return -ENOMEM;
+
+ flags = FLAGS_MODE1;
+ if (activate)
+ flags |= FLAGS_ACTIVATE;
+
+ rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags, 0, __pa(buf_page),
+ 0, 0, 0, 0, 0, 0);
+ if (rc != H_SUCCESS) {
+ free_page((unsigned long) buf_page);
+ return -EINVAL;
+ }
+
+ cnt = retbuf[0];
+ for (i = 0; i < cnt; i++) {
+ cpu = drc_index_to_cpu(buf_page[2*i+1]);
+ if ((cpu_online(cpu) && !activate) ||
+ (!cpu_online(cpu) && activate))
+ s += sprintf(s, "%d,", cpu);
+ }
+ if (s > page) { /* Something to show */
+ s--; /* Suppress last comma */
+ s += sprintf(s, "\n");
+ }
+
+ free_page((unsigned long) buf_page);
+ return s-page;
+}
+
+static ssize_t get_best_energy_data(struct device *dev,
+ char *page, int activate)
+{
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+ unsigned long flags = 0;
+
+ flags = FLAGS_MODE2;
+ if (activate)
+ flags |= FLAGS_ACTIVATE;
+
+ rc = plpar_hcall9(H_BEST_ENERGY, retbuf, flags,
+ cpu_to_drc_index(dev->id),
+ 0, 0, 0, 0, 0, 0, 0);
+
+ if (rc != H_SUCCESS)
+ return -EINVAL;
+
+ return sprintf(page, "%lu\n", retbuf[1] >> 32);
+}
+
+/* Wrapper functions */
+
+static ssize_t cpu_activate_hint_list_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ return get_best_energy_list(page, 1);
+}
+
+static ssize_t cpu_deactivate_hint_list_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ return get_best_energy_list(page, 0);
+}
+
+static ssize_t percpu_activate_hint_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ return get_best_energy_data(dev, page, 1);
+}
+
+static ssize_t percpu_deactivate_hint_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ return get_best_energy_data(dev, page, 0);
+}
+
+/*
+ * Create sysfs interface:
+ * /sys/devices/system/cpu/pseries_activate_hint_list
+ * /sys/devices/system/cpu/pseries_deactivate_hint_list
+ * Comma separated list of cpus to activate or deactivate
+ * /sys/devices/system/cpu/cpuN/pseries_activate_hint
+ * /sys/devices/system/cpu/cpuN/pseries_deactivate_hint
+ * Per-cpu value of the hint
+ */
+
+static struct device_attribute attr_cpu_activate_hint_list =
+ __ATTR(pseries_activate_hint_list, 0444,
+ cpu_activate_hint_list_show, NULL);
+
+static struct device_attribute attr_cpu_deactivate_hint_list =
+ __ATTR(pseries_deactivate_hint_list, 0444,
+ cpu_deactivate_hint_list_show, NULL);
+
+static struct device_attribute attr_percpu_activate_hint =
+ __ATTR(pseries_activate_hint, 0444,
+ percpu_activate_hint_show, NULL);
+
+static struct device_attribute attr_percpu_deactivate_hint =
+ __ATTR(pseries_deactivate_hint, 0444,
+ percpu_deactivate_hint_show, NULL);
+
+static int __init pseries_energy_init(void)
+{
+ int cpu, err;
+ struct device *cpu_dev, *dev_root;
+
+ if (!firmware_has_feature(FW_FEATURE_BEST_ENERGY))
+ return 0; /* H_BEST_ENERGY hcall not supported */
+
+ /* Create the sysfs files */
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ err = device_create_file(dev_root, &attr_cpu_activate_hint_list);
+ if (!err)
+ err = device_create_file(dev_root, &attr_cpu_deactivate_hint_list);
+ put_device(dev_root);
+ if (err)
+ return err;
+ }
+
+ for_each_possible_cpu(cpu) {
+ cpu_dev = get_cpu_device(cpu);
+ err = device_create_file(cpu_dev,
+ &attr_percpu_activate_hint);
+ if (err)
+ break;
+ err = device_create_file(cpu_dev,
+ &attr_percpu_deactivate_hint);
+ if (err)
+ break;
+ }
+
+ if (err)
+ return err;
+
+ sysfs_entries = 1; /* Removed entries on cleanup */
+ return 0;
+
+}
+
+static void __exit pseries_energy_cleanup(void)
+{
+ int cpu;
+ struct device *cpu_dev, *dev_root;
+
+ if (!sysfs_entries)
+ return;
+
+ /* Remove the sysfs files */
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ device_remove_file(dev_root, &attr_cpu_activate_hint_list);
+ device_remove_file(dev_root, &attr_cpu_deactivate_hint_list);
+ put_device(dev_root);
+ }
+
+ for_each_possible_cpu(cpu) {
+ cpu_dev = get_cpu_device(cpu);
+ sysfs_remove_file(&cpu_dev->kobj,
+ &attr_percpu_activate_hint.attr);
+ sysfs_remove_file(&cpu_dev->kobj,
+ &attr_percpu_deactivate_hint.attr);
+ }
+}
+
+module_init(pseries_energy_init);
+module_exit(pseries_energy_cleanup);
+MODULE_DESCRIPTION("Driver for pSeries platform energy management");
+MODULE_AUTHOR("Vaidyanathan Srinivasan");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
new file mode 100644
index 000000000..adafd593d
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -0,0 +1,882 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Dave Engebretsen IBM Corporation
+ */
+
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <linux/fs.h>
+#include <linux/reboot.h>
+#include <linux/irq_work.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+#include <asm/firmware.h>
+#include <asm/mce.h>
+
+#include "pseries.h"
+
+static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
+static DEFINE_SPINLOCK(ras_log_buf_lock);
+
+static int ras_check_exception_token;
+
+#define EPOW_SENSOR_TOKEN 9
+#define EPOW_SENSOR_INDEX 0
+
+/* EPOW events counter variable */
+static int num_epow_events;
+
+static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id);
+static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
+static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
+
+/* RTAS pseries MCE errorlog section. */
+struct pseries_mc_errorlog {
+ __be32 fru_id;
+ __be32 proc_id;
+ u8 error_type;
+ /*
+ * sub_err_type (1 byte). Bit fields depends on error_type
+ *
+ * MSB0
+ * |
+ * V
+ * 01234567
+ * XXXXXXXX
+ *
+ * For error_type == MC_ERROR_TYPE_UE
+ * XXXXXXXX
+ * X 1: Permanent or Transient UE.
+ * X 1: Effective address provided.
+ * X 1: Logical address provided.
+ * XX 2: Reserved.
+ * XXX 3: Type of UE error.
+ *
+ * For error_type == MC_ERROR_TYPE_SLB/ERAT/TLB
+ * XXXXXXXX
+ * X 1: Effective address provided.
+ * XXXXX 5: Reserved.
+ * XX 2: Type of SLB/ERAT/TLB error.
+ *
+ * For error_type == MC_ERROR_TYPE_CTRL_MEM_ACCESS
+ * XXXXXXXX
+ * X 1: Error causing address provided.
+ * XXX 3: Type of error.
+ * XXXX 4: Reserved.
+ */
+ u8 sub_err_type;
+ u8 reserved_1[6];
+ __be64 effective_address;
+ __be64 logical_address;
+} __packed;
+
+/* RTAS pseries MCE error types */
+#define MC_ERROR_TYPE_UE 0x00
+#define MC_ERROR_TYPE_SLB 0x01
+#define MC_ERROR_TYPE_ERAT 0x02
+#define MC_ERROR_TYPE_UNKNOWN 0x03
+#define MC_ERROR_TYPE_TLB 0x04
+#define MC_ERROR_TYPE_D_CACHE 0x05
+#define MC_ERROR_TYPE_I_CACHE 0x07
+#define MC_ERROR_TYPE_CTRL_MEM_ACCESS 0x08
+
+/* RTAS pseries MCE error sub types */
+#define MC_ERROR_UE_INDETERMINATE 0
+#define MC_ERROR_UE_IFETCH 1
+#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2
+#define MC_ERROR_UE_LOAD_STORE 3
+#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+
+#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
+#define UE_LOGICAL_ADDR_PROVIDED 0x20
+#define MC_EFFECTIVE_ADDR_PROVIDED 0x80
+
+#define MC_ERROR_SLB_PARITY 0
+#define MC_ERROR_SLB_MULTIHIT 1
+#define MC_ERROR_SLB_INDETERMINATE 2
+
+#define MC_ERROR_ERAT_PARITY 1
+#define MC_ERROR_ERAT_MULTIHIT 2
+#define MC_ERROR_ERAT_INDETERMINATE 3
+
+#define MC_ERROR_TLB_PARITY 1
+#define MC_ERROR_TLB_MULTIHIT 2
+#define MC_ERROR_TLB_INDETERMINATE 3
+
+#define MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK 0
+#define MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS 1
+
+static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
+{
+ switch (mlog->error_type) {
+ case MC_ERROR_TYPE_UE:
+ return (mlog->sub_err_type & 0x07);
+ case MC_ERROR_TYPE_SLB:
+ case MC_ERROR_TYPE_ERAT:
+ case MC_ERROR_TYPE_TLB:
+ return (mlog->sub_err_type & 0x03);
+ case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+ return (mlog->sub_err_type & 0x70) >> 4;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Enable the hotplug interrupt late because processing them may touch other
+ * devices or systems (e.g. hugepages) that have not been initialized at the
+ * subsys stage.
+ */
+static int __init init_ras_hotplug_IRQ(void)
+{
+ struct device_node *np;
+
+ /* Hotplug Events */
+ np = of_find_node_by_path("/event-sources/hot-plug-events");
+ if (np != NULL) {
+ if (dlpar_workqueue_init() == 0)
+ request_event_sources_irqs(np, ras_hotplug_interrupt,
+ "RAS_HOTPLUG");
+ of_node_put(np);
+ }
+
+ return 0;
+}
+machine_late_initcall(pseries, init_ras_hotplug_IRQ);
+
+/*
+ * Initialize handlers for the set of interrupts caused by hardware errors
+ * and power system events.
+ */
+static int __init init_ras_IRQ(void)
+{
+ struct device_node *np;
+
+ ras_check_exception_token = rtas_function_token(RTAS_FN_CHECK_EXCEPTION);
+
+ /* Internal Errors */
+ np = of_find_node_by_path("/event-sources/internal-errors");
+ if (np != NULL) {
+ request_event_sources_irqs(np, ras_error_interrupt,
+ "RAS_ERROR");
+ of_node_put(np);
+ }
+
+ /* EPOW Events */
+ np = of_find_node_by_path("/event-sources/epow-events");
+ if (np != NULL) {
+ request_event_sources_irqs(np, ras_epow_interrupt, "RAS_EPOW");
+ of_node_put(np);
+ }
+
+ return 0;
+}
+machine_subsys_initcall(pseries, init_ras_IRQ);
+
+#define EPOW_SHUTDOWN_NORMAL 1
+#define EPOW_SHUTDOWN_ON_UPS 2
+#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3
+#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4
+
+static void handle_system_shutdown(char event_modifier)
+{
+ switch (event_modifier) {
+ case EPOW_SHUTDOWN_NORMAL:
+ pr_emerg("Power off requested\n");
+ orderly_poweroff(true);
+ break;
+
+ case EPOW_SHUTDOWN_ON_UPS:
+ pr_emerg("Loss of system power detected. System is running on"
+ " UPS/battery. Check RTAS error log for details\n");
+ break;
+
+ case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
+ pr_emerg("Loss of system critical functions detected. Check"
+ " RTAS error log for details\n");
+ orderly_poweroff(true);
+ break;
+
+ case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
+ pr_emerg("High ambient temperature detected. Check RTAS"
+ " error log for details\n");
+ orderly_poweroff(true);
+ break;
+
+ default:
+ pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
+ event_modifier);
+ }
+}
+
+struct epow_errorlog {
+ unsigned char sensor_value;
+ unsigned char event_modifier;
+ unsigned char extended_modifier;
+ unsigned char reserved;
+ unsigned char platform_reason;
+};
+
+#define EPOW_RESET 0
+#define EPOW_WARN_COOLING 1
+#define EPOW_WARN_POWER 2
+#define EPOW_SYSTEM_SHUTDOWN 3
+#define EPOW_SYSTEM_HALT 4
+#define EPOW_MAIN_ENCLOSURE 5
+#define EPOW_POWER_OFF 7
+
+static void rtas_parse_epow_errlog(struct rtas_error_log *log)
+{
+ struct pseries_errorlog *pseries_log;
+ struct epow_errorlog *epow_log;
+ char action_code;
+ char modifier;
+
+ pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
+ if (pseries_log == NULL)
+ return;
+
+ epow_log = (struct epow_errorlog *)pseries_log->data;
+ action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */
+ modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */
+
+ switch (action_code) {
+ case EPOW_RESET:
+ if (num_epow_events) {
+ pr_info("Non critical power/cooling issue cleared\n");
+ num_epow_events--;
+ }
+ break;
+
+ case EPOW_WARN_COOLING:
+ pr_info("Non-critical cooling issue detected. Check RTAS error"
+ " log for details\n");
+ break;
+
+ case EPOW_WARN_POWER:
+ pr_info("Non-critical power issue detected. Check RTAS error"
+ " log for details\n");
+ break;
+
+ case EPOW_SYSTEM_SHUTDOWN:
+ handle_system_shutdown(modifier);
+ break;
+
+ case EPOW_SYSTEM_HALT:
+ pr_emerg("Critical power/cooling issue detected. Check RTAS"
+ " error log for details. Powering off.\n");
+ orderly_poweroff(true);
+ break;
+
+ case EPOW_MAIN_ENCLOSURE:
+ case EPOW_POWER_OFF:
+ pr_emerg("System about to lose power. Check RTAS error log "
+ " for details. Powering off immediately.\n");
+ emergency_sync();
+ kernel_power_off();
+ break;
+
+ default:
+ pr_err("Unknown power/cooling event (action code = %d)\n",
+ action_code);
+ }
+
+ /* Increment epow events counter variable */
+ if (action_code != EPOW_RESET)
+ num_epow_events++;
+}
+
+static irqreturn_t ras_hotplug_interrupt(int irq, void *dev_id)
+{
+ struct pseries_errorlog *pseries_log;
+ struct pseries_hp_errorlog *hp_elog;
+
+ spin_lock(&ras_log_buf_lock);
+
+ rtas_call(ras_check_exception_token, 6, 1, NULL,
+ RTAS_VECTOR_EXTERNAL_INTERRUPT, virq_to_hw(irq),
+ RTAS_HOTPLUG_EVENTS, 0, __pa(&ras_log_buf),
+ rtas_get_error_log_max());
+
+ pseries_log = get_pseries_errorlog((struct rtas_error_log *)ras_log_buf,
+ PSERIES_ELOG_SECT_ID_HOTPLUG);
+ hp_elog = (struct pseries_hp_errorlog *)pseries_log->data;
+
+ /*
+ * Since PCI hotplug is not currently supported on pseries, put PCI
+ * hotplug events on the ras_log_buf to be handled by rtas_errd.
+ */
+ if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
+ hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
+ hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
+ queue_hotplug_event(hp_elog);
+ else
+ log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
+
+ spin_unlock(&ras_log_buf_lock);
+ return IRQ_HANDLED;
+}
+
+/* Handle environmental and power warning (EPOW) interrupts. */
+static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
+{
+ int state;
+ int critical;
+
+ rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
+
+ if (state > 3)
+ critical = 1; /* Time Critical */
+ else
+ critical = 0;
+
+ spin_lock(&ras_log_buf_lock);
+
+ rtas_call(ras_check_exception_token, 6, 1, NULL, RTAS_VECTOR_EXTERNAL_INTERRUPT,
+ virq_to_hw(irq), RTAS_EPOW_WARNING, critical, __pa(&ras_log_buf),
+ rtas_get_error_log_max());
+
+ log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
+
+ rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
+
+ spin_unlock(&ras_log_buf_lock);
+ return IRQ_HANDLED;
+}
+
+/*
+ * Handle hardware error interrupts.
+ *
+ * RTAS check-exception is called to collect data on the exception. If
+ * the error is deemed recoverable, we log a warning and return.
+ * For nonrecoverable errors, an error is logged and we stop all processing
+ * as quickly as possible in order to prevent propagation of the failure.
+ */
+static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
+{
+ struct rtas_error_log *rtas_elog;
+ int status;
+ int fatal;
+
+ spin_lock(&ras_log_buf_lock);
+
+ status = rtas_call(ras_check_exception_token, 6, 1, NULL,
+ RTAS_VECTOR_EXTERNAL_INTERRUPT,
+ virq_to_hw(irq),
+ RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
+ __pa(&ras_log_buf),
+ rtas_get_error_log_max());
+
+ rtas_elog = (struct rtas_error_log *)ras_log_buf;
+
+ if (status == 0 &&
+ rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC)
+ fatal = 1;
+ else
+ fatal = 0;
+
+ /* format and print the extended information */
+ log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
+
+ if (fatal) {
+ pr_emerg("Fatal hardware error detected. Check RTAS error"
+ " log for details. Powering off immediately\n");
+ emergency_sync();
+ kernel_power_off();
+ } else {
+ pr_err("Recoverable hardware error detected\n");
+ }
+
+ spin_unlock(&ras_log_buf_lock);
+ return IRQ_HANDLED;
+}
+
+/*
+ * Some versions of FWNMI place the buffer inside the 4kB page starting at
+ * 0x7000. Other versions place it inside the rtas buffer. We check both.
+ * Minimum size of the buffer is 16 bytes.
+ */
+#define VALID_FWNMI_BUFFER(A) \
+ ((((A) >= 0x7000) && ((A) <= 0x8000 - 16)) || \
+ (((A) >= rtas.base) && ((A) <= (rtas.base + rtas.size - 16))))
+
+static inline struct rtas_error_log *fwnmi_get_errlog(void)
+{
+ return (struct rtas_error_log *)local_paca->mce_data_buf;
+}
+
+static __be64 *fwnmi_get_savep(struct pt_regs *regs)
+{
+ unsigned long savep_ra;
+
+ /* Mask top two bits */
+ savep_ra = regs->gpr[3] & ~(0x3UL << 62);
+ if (!VALID_FWNMI_BUFFER(savep_ra)) {
+ printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
+ return NULL;
+ }
+
+ return __va(savep_ra);
+}
+
+/*
+ * Get the error information for errors coming through the
+ * FWNMI vectors. The pt_regs' r3 will be updated to reflect
+ * the actual r3 if possible, and a ptr to the error log entry
+ * will be returned if found.
+ *
+ * Use one buffer mce_data_buf per cpu to store RTAS error.
+ *
+ * The mce_data_buf does not have any locks or protection around it,
+ * if a second machine check comes in, or a system reset is done
+ * before we have logged the error, then we will get corruption in the
+ * error log. This is preferable over holding off on calling
+ * ibm,nmi-interlock which would result in us checkstopping if a
+ * second machine check did come in.
+ */
+static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
+{
+ struct rtas_error_log *h;
+ __be64 *savep;
+
+ savep = fwnmi_get_savep(regs);
+ if (!savep)
+ return NULL;
+
+ regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
+
+ h = (struct rtas_error_log *)&savep[1];
+ /* Use the per cpu buffer from paca to store rtas error log */
+ memset(local_paca->mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
+ if (!rtas_error_extended(h)) {
+ memcpy(local_paca->mce_data_buf, h, sizeof(__u64));
+ } else {
+ int len, error_log_length;
+
+ error_log_length = 8 + rtas_error_extended_log_length(h);
+ len = min_t(int, error_log_length, RTAS_ERROR_LOG_MAX);
+ memcpy(local_paca->mce_data_buf, h, len);
+ }
+
+ return (struct rtas_error_log *)local_paca->mce_data_buf;
+}
+
+/* Call this when done with the data returned by FWNMI_get_errinfo.
+ * It will release the saved data area for other CPUs in the
+ * partition to receive FWNMI errors.
+ */
+static void fwnmi_release_errinfo(void)
+{
+ struct rtas_args rtas_args;
+ int ret;
+
+ /*
+ * On pseries, the machine check stack is limited to under 4GB, so
+ * args can be on-stack.
+ */
+ rtas_call_unlocked(&rtas_args, ibm_nmi_interlock_token, 0, 1, NULL);
+ ret = be32_to_cpu(rtas_args.rets[0]);
+ if (ret != 0)
+ printk(KERN_ERR "FWNMI: nmi-interlock failed: %d\n", ret);
+}
+
+int pSeries_system_reset_exception(struct pt_regs *regs)
+{
+#ifdef __LITTLE_ENDIAN__
+ /*
+ * Some firmware byteswaps SRR registers and gives incorrect SRR1. Try
+ * to detect the bad SRR1 pattern here. Flip the NIP back to correct
+ * endian for reporting purposes. Unfortunately the MSR can't be fixed,
+ * so clear it. It will be missing MSR_RI so we won't try to recover.
+ */
+ if ((be64_to_cpu(regs->msr) &
+ (MSR_LE|MSR_RI|MSR_DR|MSR_IR|MSR_ME|MSR_PR|
+ MSR_ILE|MSR_HV|MSR_SF)) == (MSR_DR|MSR_SF)) {
+ regs_set_return_ip(regs, be64_to_cpu((__be64)regs->nip));
+ regs_set_return_msr(regs, 0);
+ }
+#endif
+
+ if (fwnmi_active) {
+ __be64 *savep;
+
+ /*
+ * Firmware (PowerVM and KVM) saves r3 to a save area like
+ * machine check, which is not exactly what PAPR (2.9)
+ * suggests but there is no way to detect otherwise, so this
+ * is the interface now.
+ *
+ * System resets do not save any error log or require an
+ * "ibm,nmi-interlock" rtas call to release.
+ */
+
+ savep = fwnmi_get_savep(regs);
+ if (savep)
+ regs->gpr[3] = be64_to_cpu(savep[0]); /* restore original r3 */
+ }
+
+ if (smp_handle_nmi_ipi(regs))
+ return 1;
+
+ return 0; /* need to perform reset */
+}
+
+static int mce_handle_err_realmode(int disposition, u8 error_type)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (disposition == RTAS_DISP_NOT_RECOVERED) {
+ switch (error_type) {
+ case MC_ERROR_TYPE_ERAT:
+ flush_erat();
+ disposition = RTAS_DISP_FULLY_RECOVERED;
+ break;
+ case MC_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ /*
+ * Store the old slb content in paca before flushing.
+ * Print this when we go to virtual mode.
+ * There are chances that we may hit MCE again if there
+ * is a parity error on the SLB entry we trying to read
+ * for saving. Hence limit the slb saving to single
+ * level of recursion.
+ */
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
+ flush_and_reload_slb();
+ disposition = RTAS_DISP_FULLY_RECOVERED;
+#endif
+ break;
+ default:
+ break;
+ }
+ } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
+ /* Platform corrected itself but could be degraded */
+ pr_err("MCE: limited recovery, system may be degraded\n");
+ disposition = RTAS_DISP_FULLY_RECOVERED;
+ }
+#endif
+ return disposition;
+}
+
+static int mce_handle_err_virtmode(struct pt_regs *regs,
+ struct rtas_error_log *errp,
+ struct pseries_mc_errorlog *mce_log,
+ int disposition)
+{
+ struct mce_error_info mce_err = { 0 };
+ int initiator = rtas_error_initiator(errp);
+ int severity = rtas_error_severity(errp);
+ unsigned long eaddr = 0, paddr = 0;
+ u8 error_type, err_sub_type;
+
+ if (!mce_log)
+ goto out;
+
+ error_type = mce_log->error_type;
+ err_sub_type = rtas_mc_error_sub_type(mce_log);
+
+ if (initiator == RTAS_INITIATOR_UNKNOWN)
+ mce_err.initiator = MCE_INITIATOR_UNKNOWN;
+ else if (initiator == RTAS_INITIATOR_CPU)
+ mce_err.initiator = MCE_INITIATOR_CPU;
+ else if (initiator == RTAS_INITIATOR_PCI)
+ mce_err.initiator = MCE_INITIATOR_PCI;
+ else if (initiator == RTAS_INITIATOR_ISA)
+ mce_err.initiator = MCE_INITIATOR_ISA;
+ else if (initiator == RTAS_INITIATOR_MEMORY)
+ mce_err.initiator = MCE_INITIATOR_MEMORY;
+ else if (initiator == RTAS_INITIATOR_POWERMGM)
+ mce_err.initiator = MCE_INITIATOR_POWERMGM;
+ else
+ mce_err.initiator = MCE_INITIATOR_UNKNOWN;
+
+ if (severity == RTAS_SEVERITY_NO_ERROR)
+ mce_err.severity = MCE_SEV_NO_ERROR;
+ else if (severity == RTAS_SEVERITY_EVENT)
+ mce_err.severity = MCE_SEV_WARNING;
+ else if (severity == RTAS_SEVERITY_WARNING)
+ mce_err.severity = MCE_SEV_WARNING;
+ else if (severity == RTAS_SEVERITY_ERROR_SYNC)
+ mce_err.severity = MCE_SEV_SEVERE;
+ else if (severity == RTAS_SEVERITY_ERROR)
+ mce_err.severity = MCE_SEV_SEVERE;
+ else
+ mce_err.severity = MCE_SEV_FATAL;
+
+ if (severity <= RTAS_SEVERITY_ERROR_SYNC)
+ mce_err.sync_error = true;
+ else
+ mce_err.sync_error = false;
+
+ mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err.error_class = MCE_ECLASS_UNKNOWN;
+
+ switch (error_type) {
+ case MC_ERROR_TYPE_UE:
+ mce_err.error_type = MCE_ERROR_TYPE_UE;
+ mce_common_process_ue(regs, &mce_err);
+ if (mce_err.ignore_event)
+ disposition = RTAS_DISP_FULLY_RECOVERED;
+ switch (err_sub_type) {
+ case MC_ERROR_UE_IFETCH:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_IFETCH;
+ break;
+ case MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ case MC_ERROR_UE_LOAD_STORE:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
+ break;
+ case MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+ break;
+ case MC_ERROR_UE_INDETERMINATE:
+ default:
+ mce_err.u.ue_error_type = MCE_UE_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+
+ if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
+ paddr = be64_to_cpu(mce_log->logical_address);
+ } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
+ unsigned long pfn;
+
+ pfn = addr_to_pfn(regs, eaddr);
+ if (pfn != ULONG_MAX)
+ paddr = pfn << PAGE_SHIFT;
+ }
+
+ break;
+ case MC_ERROR_TYPE_SLB:
+ mce_err.error_type = MCE_ERROR_TYPE_SLB;
+ switch (err_sub_type) {
+ case MC_ERROR_SLB_PARITY:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_PARITY;
+ break;
+ case MC_ERROR_SLB_MULTIHIT:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_SLB_INDETERMINATE:
+ default:
+ mce_err.u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
+ case MC_ERROR_TYPE_ERAT:
+ mce_err.error_type = MCE_ERROR_TYPE_ERAT;
+ switch (err_sub_type) {
+ case MC_ERROR_ERAT_PARITY:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_PARITY;
+ break;
+ case MC_ERROR_ERAT_MULTIHIT:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_ERAT_INDETERMINATE:
+ default:
+ mce_err.u.erat_error_type = MCE_ERAT_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
+ case MC_ERROR_TYPE_TLB:
+ mce_err.error_type = MCE_ERROR_TYPE_TLB;
+ switch (err_sub_type) {
+ case MC_ERROR_TLB_PARITY:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_PARITY;
+ break;
+ case MC_ERROR_TLB_MULTIHIT:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
+ break;
+ case MC_ERROR_TLB_INDETERMINATE:
+ default:
+ mce_err.u.tlb_error_type = MCE_TLB_ERROR_INDETERMINATE;
+ break;
+ }
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
+ case MC_ERROR_TYPE_D_CACHE:
+ mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
+ break;
+ case MC_ERROR_TYPE_I_CACHE:
+ mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
+ break;
+ case MC_ERROR_TYPE_CTRL_MEM_ACCESS:
+ mce_err.error_type = MCE_ERROR_TYPE_RA;
+ switch (err_sub_type) {
+ case MC_ERROR_CTRL_MEM_ACCESS_PTABLE_WALK:
+ mce_err.u.ra_error_type =
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN;
+ break;
+ case MC_ERROR_CTRL_MEM_ACCESS_OP_ACCESS:
+ mce_err.u.ra_error_type =
+ MCE_RA_ERROR_LOAD_STORE_FOREIGN;
+ break;
+ }
+ if (mce_log->sub_err_type & MC_EFFECTIVE_ADDR_PROVIDED)
+ eaddr = be64_to_cpu(mce_log->effective_address);
+ break;
+ case MC_ERROR_TYPE_UNKNOWN:
+ default:
+ mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
+ break;
+ }
+out:
+ save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
+ &mce_err, regs->nip, eaddr, paddr);
+ return disposition;
+}
+
+static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
+{
+ struct pseries_errorlog *pseries_log;
+ struct pseries_mc_errorlog *mce_log = NULL;
+ int disposition = rtas_error_disposition(errp);
+ u8 error_type;
+
+ if (!rtas_error_extended(errp))
+ goto out;
+
+ pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+ if (!pseries_log)
+ goto out;
+
+ mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+ error_type = mce_log->error_type;
+
+ disposition = mce_handle_err_realmode(disposition, error_type);
+out:
+ disposition = mce_handle_err_virtmode(regs, errp, mce_log,
+ disposition);
+ return disposition;
+}
+
+/*
+ * Process MCE rtas errlog event.
+ */
+void pSeries_machine_check_log_err(void)
+{
+ struct rtas_error_log *err;
+
+ err = fwnmi_get_errlog();
+ log_error((char *)err, ERR_TYPE_RTAS_LOG, 0);
+}
+
+/*
+ * See if we can recover from a machine check exception.
+ * This is only called on power4 (or above) and only via
+ * the Firmware Non-Maskable Interrupts (fwnmi) handler
+ * which provides the error analysis for us.
+ *
+ * Return 1 if corrected (or delivered a signal).
+ * Return 0 if there is nothing we can do.
+ */
+static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt)
+{
+ int recovered = 0;
+
+ if (regs_is_unrecoverable(regs)) {
+ /* If MSR_RI isn't set, we cannot recover */
+ pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
+ recovered = 0;
+ } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
+ /* Platform corrected itself */
+ recovered = 1;
+ } else if (evt->severity == MCE_SEV_FATAL) {
+ /* Fatal machine check */
+ pr_err("Machine check interrupt is fatal\n");
+ recovered = 0;
+ }
+
+ if (!recovered && evt->sync_error) {
+ /*
+ * Try to kill processes if we get a synchronous machine check
+ * (e.g., one caused by execution of this instruction). This
+ * will devolve into a panic if we try to kill init or are in
+ * an interrupt etc.
+ *
+ * TODO: Queue up this address for hwpoisioning later.
+ * TODO: This is not quite right for d-side machine
+ * checks ->nip is not necessarily the important
+ * address.
+ */
+ if ((user_mode(regs))) {
+ _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip);
+ recovered = 1;
+ } else if (die_will_crash()) {
+ /*
+ * die() would kill the kernel, so better to go via
+ * the platform reboot code that will log the
+ * machine check.
+ */
+ recovered = 0;
+ } else {
+ die_mce("Machine check", regs, SIGBUS);
+ recovered = 1;
+ }
+ }
+
+ return recovered;
+}
+
+/*
+ * Handle a machine check.
+ *
+ * Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
+ * should be present. If so the handler which called us tells us if the
+ * error was recovered (never true if RI=0).
+ *
+ * On hardware prior to Power 4 these exceptions were asynchronous which
+ * means we can't tell exactly where it occurred and so we can't recover.
+ */
+int pSeries_machine_check_exception(struct pt_regs *regs)
+{
+ struct machine_check_event evt;
+
+ if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
+ return 0;
+
+ /* Print things out */
+ if (evt.version != MCE_V1) {
+ pr_err("Machine Check Exception, Unknown event version %d !\n",
+ evt.version);
+ return 0;
+ }
+ machine_check_print_event_info(&evt, user_mode(regs), false);
+
+ if (recover_mce(regs, &evt))
+ return 1;
+
+ return 0;
+}
+
+long pseries_machine_check_realmode(struct pt_regs *regs)
+{
+ struct rtas_error_log *errp;
+ int disposition;
+
+ if (fwnmi_active) {
+ errp = fwnmi_get_errinfo(regs);
+ /*
+ * Call to fwnmi_release_errinfo() in real mode causes kernel
+ * to panic. Hence we will call it as soon as we go into
+ * virtual mode.
+ */
+ disposition = mce_handle_error(regs, errp);
+
+ fwnmi_release_errinfo();
+
+ if (disposition == RTAS_DISP_FULLY_RECOVERED)
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
new file mode 100644
index 000000000..599bd2c78
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * pSeries_reconfig.c - support for dynamic reconfiguration (including PCI
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms).
+ *
+ * Copyright (C) 2005 Nathan Lynch
+ * Copyright (C) 2005 IBM Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <linux/uaccess.h>
+#include <asm/mmu.h>
+
+#include "of_helpers.h"
+
+static int pSeries_reconfig_add_node(const char *path, struct property *proplist)
+{
+ struct device_node *np;
+ int err = -ENOMEM;
+
+ np = kzalloc(sizeof(*np), GFP_KERNEL);
+ if (!np)
+ goto out_err;
+
+ np->full_name = kstrdup(kbasename(path), GFP_KERNEL);
+ if (!np->full_name)
+ goto out_err;
+
+ np->properties = proplist;
+ of_node_set_flag(np, OF_DYNAMIC);
+ of_node_init(np);
+
+ np->parent = pseries_of_derive_parent(path);
+ if (IS_ERR(np->parent)) {
+ err = PTR_ERR(np->parent);
+ goto out_err;
+ }
+
+ err = of_attach_node(np);
+ if (err) {
+ printk(KERN_ERR "Failed to add device node %s\n", path);
+ goto out_err;
+ }
+
+ of_node_put(np->parent);
+
+ return 0;
+
+out_err:
+ if (np) {
+ of_node_put(np->parent);
+ kfree(np->full_name);
+ kfree(np);
+ }
+ return err;
+}
+
+static int pSeries_reconfig_remove_node(struct device_node *np)
+{
+ struct device_node *parent, *child;
+
+ parent = of_get_parent(np);
+ if (!parent)
+ return -EINVAL;
+
+ if ((child = of_get_next_child(np, NULL))) {
+ of_node_put(child);
+ of_node_put(parent);
+ return -EBUSY;
+ }
+
+ of_detach_node(np);
+ of_node_put(parent);
+ return 0;
+}
+
+/*
+ * /proc/powerpc/ofdt - yucky binary interface for adding and removing
+ * OF device nodes. Should be deprecated as soon as we get an
+ * in-kernel wrapper for the RTAS ibm,configure-connector call.
+ */
+
+static void release_prop_list(const struct property *prop)
+{
+ struct property *next;
+ for (; prop; prop = next) {
+ next = prop->next;
+ kfree(prop->name);
+ kfree(prop->value);
+ kfree(prop);
+ }
+
+}
+
+/**
+ * parse_next_property - process the next property from raw input buffer
+ * @buf: input buffer, must be nul-terminated
+ * @end: end of the input buffer + 1, for validation
+ * @name: return value; set to property name in buf
+ * @length: return value; set to length of value
+ * @value: return value; set to the property value in buf
+ *
+ * Note that the caller must make copies of the name and value returned,
+ * this function does no allocation or copying of the data. Return value
+ * is set to the next name in buf, or NULL on error.
+ */
+static char * parse_next_property(char *buf, char *end, char **name, int *length,
+ unsigned char **value)
+{
+ char *tmp;
+
+ *name = buf;
+
+ tmp = strchr(buf, ' ');
+ if (!tmp) {
+ printk(KERN_ERR "property parse failed in %s at line %d\n",
+ __func__, __LINE__);
+ return NULL;
+ }
+ *tmp = '\0';
+
+ if (++tmp >= end) {
+ printk(KERN_ERR "property parse failed in %s at line %d\n",
+ __func__, __LINE__);
+ return NULL;
+ }
+
+ /* now we're on the length */
+ *length = -1;
+ *length = simple_strtoul(tmp, &tmp, 10);
+ if (*length == -1) {
+ printk(KERN_ERR "property parse failed in %s at line %d\n",
+ __func__, __LINE__);
+ return NULL;
+ }
+ if (*tmp != ' ' || ++tmp >= end) {
+ printk(KERN_ERR "property parse failed in %s at line %d\n",
+ __func__, __LINE__);
+ return NULL;
+ }
+
+ /* now we're on the value */
+ *value = tmp;
+ tmp += *length;
+ if (tmp > end) {
+ printk(KERN_ERR "property parse failed in %s at line %d\n",
+ __func__, __LINE__);
+ return NULL;
+ }
+ else if (tmp < end && *tmp != ' ' && *tmp != '\0') {
+ printk(KERN_ERR "property parse failed in %s at line %d\n",
+ __func__, __LINE__);
+ return NULL;
+ }
+ tmp++;
+
+ /* and now we should be on the next name, or the end */
+ return tmp;
+}
+
+static struct property *new_property(const char *name, const int length,
+ const unsigned char *value, struct property *last)
+{
+ struct property *new = kzalloc(sizeof(*new), GFP_KERNEL);
+
+ if (!new)
+ return NULL;
+
+ if (!(new->name = kstrdup(name, GFP_KERNEL)))
+ goto cleanup;
+ if (!(new->value = kmalloc(length + 1, GFP_KERNEL)))
+ goto cleanup;
+
+ memcpy(new->value, value, length);
+ *(((char *)new->value) + length) = 0;
+ new->length = length;
+ new->next = last;
+ return new;
+
+cleanup:
+ kfree(new->name);
+ kfree(new->value);
+ kfree(new);
+ return NULL;
+}
+
+static int do_add_node(char *buf, size_t bufsize)
+{
+ char *path, *end, *name;
+ struct device_node *np;
+ struct property *prop = NULL;
+ unsigned char* value;
+ int length, rv = 0;
+
+ end = buf + bufsize;
+ path = buf;
+ buf = strchr(buf, ' ');
+ if (!buf)
+ return -EINVAL;
+ *buf = '\0';
+ buf++;
+
+ if ((np = of_find_node_by_path(path))) {
+ of_node_put(np);
+ return -EINVAL;
+ }
+
+ /* rv = build_prop_list(tmp, bufsize - (tmp - buf), &proplist); */
+ while (buf < end &&
+ (buf = parse_next_property(buf, end, &name, &length, &value))) {
+ struct property *last = prop;
+
+ prop = new_property(name, length, value, last);
+ if (!prop) {
+ rv = -ENOMEM;
+ prop = last;
+ goto out;
+ }
+ }
+ if (!buf) {
+ rv = -EINVAL;
+ goto out;
+ }
+
+ rv = pSeries_reconfig_add_node(path, prop);
+
+out:
+ if (rv)
+ release_prop_list(prop);
+ return rv;
+}
+
+static int do_remove_node(char *buf)
+{
+ struct device_node *node;
+ int rv = -ENODEV;
+
+ if ((node = of_find_node_by_path(buf)))
+ rv = pSeries_reconfig_remove_node(node);
+
+ of_node_put(node);
+ return rv;
+}
+
+static char *parse_node(char *buf, size_t bufsize, struct device_node **npp)
+{
+ char *handle_str;
+ phandle handle;
+ *npp = NULL;
+
+ handle_str = buf;
+
+ buf = strchr(buf, ' ');
+ if (!buf)
+ return NULL;
+ *buf = '\0';
+ buf++;
+
+ handle = simple_strtoul(handle_str, NULL, 0);
+
+ *npp = of_find_node_by_phandle(handle);
+ return buf;
+}
+
+static int do_add_property(char *buf, size_t bufsize)
+{
+ struct property *prop = NULL;
+ struct device_node *np;
+ unsigned char *value;
+ char *name, *end;
+ int length;
+ end = buf + bufsize;
+ buf = parse_node(buf, bufsize, &np);
+
+ if (!np)
+ return -ENODEV;
+
+ if (parse_next_property(buf, end, &name, &length, &value) == NULL)
+ return -EINVAL;
+
+ prop = new_property(name, length, value, NULL);
+ if (!prop)
+ return -ENOMEM;
+
+ of_add_property(np, prop);
+
+ return 0;
+}
+
+static int do_remove_property(char *buf, size_t bufsize)
+{
+ struct device_node *np;
+ char *tmp;
+ buf = parse_node(buf, bufsize, &np);
+
+ if (!np)
+ return -ENODEV;
+
+ tmp = strchr(buf,' ');
+ if (tmp)
+ *tmp = '\0';
+
+ if (strlen(buf) == 0)
+ return -EINVAL;
+
+ return of_remove_property(np, of_find_property(np, buf, NULL));
+}
+
+static int do_update_property(char *buf, size_t bufsize)
+{
+ struct device_node *np;
+ unsigned char *value;
+ char *name, *end, *next_prop;
+ int length;
+ struct property *newprop;
+ buf = parse_node(buf, bufsize, &np);
+ end = buf + bufsize;
+
+ if (!np)
+ return -ENODEV;
+
+ next_prop = parse_next_property(buf, end, &name, &length, &value);
+ if (!next_prop)
+ return -EINVAL;
+
+ if (!strlen(name))
+ return -ENODEV;
+
+ newprop = new_property(name, length, value, NULL);
+ if (!newprop)
+ return -ENOMEM;
+
+ if (!strcmp(name, "slb-size") || !strcmp(name, "ibm,slb-size"))
+ slb_set_size(*(int *)value);
+
+ return of_update_property(np, newprop);
+}
+
+/**
+ * ofdt_write - perform operations on the Open Firmware device tree
+ *
+ * @file: not used
+ * @buf: command and arguments
+ * @count: size of the command buffer
+ * @off: not used
+ *
+ * Operations supported at this time are addition and removal of
+ * whole nodes along with their properties. Operations on individual
+ * properties are not implemented (yet).
+ */
+static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count,
+ loff_t *off)
+{
+ int rv;
+ char *kbuf;
+ char *tmp;
+
+ rv = security_locked_down(LOCKDOWN_DEVICE_TREE);
+ if (rv)
+ return rv;
+
+ kbuf = memdup_user_nul(buf, count);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+
+ tmp = strchr(kbuf, ' ');
+ if (!tmp) {
+ rv = -EINVAL;
+ goto out;
+ }
+ *tmp = '\0';
+ tmp++;
+
+ if (!strcmp(kbuf, "add_node"))
+ rv = do_add_node(tmp, count - (tmp - kbuf));
+ else if (!strcmp(kbuf, "remove_node"))
+ rv = do_remove_node(tmp);
+ else if (!strcmp(kbuf, "add_property"))
+ rv = do_add_property(tmp, count - (tmp - kbuf));
+ else if (!strcmp(kbuf, "remove_property"))
+ rv = do_remove_property(tmp, count - (tmp - kbuf));
+ else if (!strcmp(kbuf, "update_property"))
+ rv = do_update_property(tmp, count - (tmp - kbuf));
+ else
+ rv = -EINVAL;
+out:
+ kfree(kbuf);
+ return rv ? rv : count;
+}
+
+static const struct proc_ops ofdt_proc_ops = {
+ .proc_write = ofdt_write,
+ .proc_lseek = noop_llseek,
+};
+
+/* create /proc/powerpc/ofdt write-only by root */
+static int proc_ppc64_create_ofdt(void)
+{
+ struct proc_dir_entry *ent;
+
+ ent = proc_create("powerpc/ofdt", 0200, NULL, &ofdt_proc_ops);
+ if (ent)
+ proc_set_size(ent, 0);
+
+ return 0;
+}
+machine_device_initcall(pseries, proc_ppc64_create_ofdt);
diff --git a/arch/powerpc/platforms/pseries/rng.c b/arch/powerpc/platforms/pseries/rng.c
new file mode 100644
index 000000000..6ddfdeaac
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rng.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "pseries-rng: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <asm/archrandom.h>
+#include <asm/machdep.h>
+#include <asm/plpar_wrappers.h>
+#include "pseries.h"
+
+
+static int pseries_get_random_long(unsigned long *v)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ if (plpar_hcall(H_RANDOM, retbuf) == H_SUCCESS) {
+ *v = retbuf[0];
+ return 1;
+ }
+
+ return 0;
+}
+
+void __init pseries_rng_init(void)
+{
+ struct device_node *dn;
+
+ dn = of_find_compatible_node(NULL, NULL, "ibm,random");
+ if (!dn)
+ return;
+ ppc_md.get_random_seed = pseries_get_random_long;
+ of_node_put(dn);
+}
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.c b/arch/powerpc/platforms/pseries/rtas-fadump.c
new file mode 100644
index 000000000..b5853e9fc
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.c
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "rtas fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+
+#include <asm/page.h>
+#include <asm/rtas.h>
+#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
+
+#include "rtas-fadump.h"
+
+static struct rtas_fadump_mem_struct fdm;
+static const struct rtas_fadump_mem_struct *fdm_active;
+
+static void rtas_fadump_update_config(struct fw_dump *fadump_conf,
+ const struct rtas_fadump_mem_struct *fdm)
+{
+ fadump_conf->boot_mem_dest_addr =
+ be64_to_cpu(fdm->rmr_region.destination_address);
+
+ fadump_conf->fadumphdr_addr = (fadump_conf->boot_mem_dest_addr +
+ fadump_conf->boot_memory_size);
+}
+
+/*
+ * This function is called in the capture kernel to get configuration details
+ * setup in the first kernel and passed to the f/w.
+ */
+static void __init rtas_fadump_get_config(struct fw_dump *fadump_conf,
+ const struct rtas_fadump_mem_struct *fdm)
+{
+ fadump_conf->boot_mem_addr[0] =
+ be64_to_cpu(fdm->rmr_region.source_address);
+ fadump_conf->boot_mem_sz[0] = be64_to_cpu(fdm->rmr_region.source_len);
+ fadump_conf->boot_memory_size = fadump_conf->boot_mem_sz[0];
+
+ fadump_conf->boot_mem_top = fadump_conf->boot_memory_size;
+ fadump_conf->boot_mem_regs_cnt = 1;
+
+ /*
+ * Start address of reserve dump area (permanent reservation) for
+ * re-registering FADump after dump capture.
+ */
+ fadump_conf->reserve_dump_area_start =
+ be64_to_cpu(fdm->cpu_state_data.destination_address);
+
+ rtas_fadump_update_config(fadump_conf, fdm);
+}
+
+static u64 rtas_fadump_init_mem_struct(struct fw_dump *fadump_conf)
+{
+ u64 addr = fadump_conf->reserve_dump_area_start;
+
+ memset(&fdm, 0, sizeof(struct rtas_fadump_mem_struct));
+ addr = addr & PAGE_MASK;
+
+ fdm.header.dump_format_version = cpu_to_be32(0x00000001);
+ fdm.header.dump_num_sections = cpu_to_be16(3);
+ fdm.header.dump_status_flag = 0;
+ fdm.header.offset_first_dump_section =
+ cpu_to_be32((u32)offsetof(struct rtas_fadump_mem_struct,
+ cpu_state_data));
+
+ /*
+ * Fields for disk dump option.
+ * We are not using disk dump option, hence set these fields to 0.
+ */
+ fdm.header.dd_block_size = 0;
+ fdm.header.dd_block_offset = 0;
+ fdm.header.dd_num_blocks = 0;
+ fdm.header.dd_offset_disk_path = 0;
+
+ /* set 0 to disable an automatic dump-reboot. */
+ fdm.header.max_time_auto = 0;
+
+ /* Kernel dump sections */
+ /* cpu state data section. */
+ fdm.cpu_state_data.request_flag =
+ cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.cpu_state_data.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_CPU_STATE_DATA);
+ fdm.cpu_state_data.source_address = 0;
+ fdm.cpu_state_data.source_len =
+ cpu_to_be64(fadump_conf->cpu_state_data_size);
+ fdm.cpu_state_data.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->cpu_state_data_size;
+
+ /* hpte region section */
+ fdm.hpte_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.hpte_region.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_HPTE_REGION);
+ fdm.hpte_region.source_address = 0;
+ fdm.hpte_region.source_len =
+ cpu_to_be64(fadump_conf->hpte_region_size);
+ fdm.hpte_region.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->hpte_region_size;
+
+ /*
+ * Align boot memory area destination address to page boundary to
+ * be able to mmap read this area in the vmcore.
+ */
+ addr = PAGE_ALIGN(addr);
+
+ /* RMA region section */
+ fdm.rmr_region.request_flag = cpu_to_be32(RTAS_FADUMP_REQUEST_FLAG);
+ fdm.rmr_region.source_data_type =
+ cpu_to_be16(RTAS_FADUMP_REAL_MODE_REGION);
+ fdm.rmr_region.source_address = cpu_to_be64(0);
+ fdm.rmr_region.source_len = cpu_to_be64(fadump_conf->boot_memory_size);
+ fdm.rmr_region.destination_address = cpu_to_be64(addr);
+ addr += fadump_conf->boot_memory_size;
+
+ rtas_fadump_update_config(fadump_conf, &fdm);
+
+ return addr;
+}
+
+static u64 rtas_fadump_get_bootmem_min(void)
+{
+ return RTAS_FADUMP_MIN_BOOT_MEM;
+}
+
+static int rtas_fadump_register(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc, err = -EIO;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_REGISTER, &fdm,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+
+ } while (wait_time);
+
+ switch (rc) {
+ case 0:
+ pr_info("Registration is successful!\n");
+ fadump_conf->dump_registered = 1;
+ err = 0;
+ break;
+ case -1:
+ pr_err("Failed to register. Hardware Error(%d).\n", rc);
+ break;
+ case -3:
+ if (!is_fadump_boot_mem_contiguous())
+ pr_err("Can't have holes in boot memory area.\n");
+ else if (!is_fadump_reserved_mem_contiguous())
+ pr_err("Can't have holes in reserved memory area.\n");
+
+ pr_err("Failed to register. Parameter Error(%d).\n", rc);
+ err = -EINVAL;
+ break;
+ case -9:
+ pr_err("Already registered!\n");
+ fadump_conf->dump_registered = 1;
+ err = -EEXIST;
+ break;
+ default:
+ pr_err("Failed to register. Unknown Error(%d).\n", rc);
+ break;
+ }
+
+ return err;
+}
+
+static int rtas_fadump_unregister(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_UNREGISTER, &fdm,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ pr_err("Failed to un-register - unexpected error(%d).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_registered = 0;
+ return 0;
+}
+
+static int rtas_fadump_invalidate(struct fw_dump *fadump_conf)
+{
+ unsigned int wait_time;
+ int rc;
+
+ /* TODO: Add upper time limit for the delay */
+ do {
+ rc = rtas_call(fadump_conf->ibm_configure_kernel_dump, 3, 1,
+ NULL, FADUMP_INVALIDATE, fdm_active,
+ sizeof(struct rtas_fadump_mem_struct));
+
+ wait_time = rtas_busy_delay_time(rc);
+ if (wait_time)
+ mdelay(wait_time);
+ } while (wait_time);
+
+ if (rc) {
+ pr_err("Failed to invalidate - unexpected error (%d).\n", rc);
+ return -EIO;
+ }
+
+ fadump_conf->dump_active = 0;
+ fdm_active = NULL;
+ return 0;
+}
+
+#define RTAS_FADUMP_GPR_MASK 0xffffff0000000000
+static inline int rtas_fadump_gpr_index(u64 id)
+{
+ char str[3];
+ int i = -1;
+
+ if ((id & RTAS_FADUMP_GPR_MASK) == fadump_str_to_u64("GPR")) {
+ /* get the digits at the end */
+ id &= ~RTAS_FADUMP_GPR_MASK;
+ id >>= 24;
+ str[2] = '\0';
+ str[1] = id & 0xff;
+ str[0] = (id >> 8) & 0xff;
+ if (kstrtoint(str, 10, &i))
+ i = -EINVAL;
+ if (i > 31)
+ i = -1;
+ }
+ return i;
+}
+
+static void __init rtas_fadump_set_regval(struct pt_regs *regs, u64 reg_id, u64 reg_val)
+{
+ int i;
+
+ i = rtas_fadump_gpr_index(reg_id);
+ if (i >= 0)
+ regs->gpr[i] = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("NIA"))
+ regs->nip = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("MSR"))
+ regs->msr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("CTR"))
+ regs->ctr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("LR"))
+ regs->link = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("XER"))
+ regs->xer = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("CR"))
+ regs->ccr = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("DAR"))
+ regs->dar = (unsigned long)reg_val;
+ else if (reg_id == fadump_str_to_u64("DSISR"))
+ regs->dsisr = (unsigned long)reg_val;
+}
+
+static struct rtas_fadump_reg_entry* __init
+rtas_fadump_read_regs(struct rtas_fadump_reg_entry *reg_entry,
+ struct pt_regs *regs)
+{
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ while (be64_to_cpu(reg_entry->reg_id) != fadump_str_to_u64("CPUEND")) {
+ rtas_fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
+ be64_to_cpu(reg_entry->reg_value));
+ reg_entry++;
+ }
+ reg_entry++;
+ return reg_entry;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init rtas_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
+{
+ struct rtas_fadump_reg_save_area_header *reg_header;
+ struct fadump_crash_info_header *fdh = NULL;
+ struct rtas_fadump_reg_entry *reg_entry;
+ u32 num_cpus, *note_buf;
+ int i, rc = 0, cpu = 0;
+ struct pt_regs regs;
+ unsigned long addr;
+ void *vaddr;
+
+ addr = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
+ vaddr = __va(addr);
+
+ reg_header = vaddr;
+ if (be64_to_cpu(reg_header->magic_number) !=
+ fadump_str_to_u64("REGSAVE")) {
+ pr_err("Unable to read register save area.\n");
+ return -ENOENT;
+ }
+
+ pr_debug("--------CPU State Data------------\n");
+ pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
+ pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
+
+ vaddr += be32_to_cpu(reg_header->num_cpu_offset);
+ num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
+ pr_debug("NumCpus : %u\n", num_cpus);
+ vaddr += sizeof(u32);
+ reg_entry = (struct rtas_fadump_reg_entry *)vaddr;
+
+ rc = fadump_setup_cpu_notes_buf(num_cpus);
+ if (rc != 0)
+ return rc;
+
+ note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
+
+ if (fadump_conf->fadumphdr_addr)
+ fdh = __va(fadump_conf->fadumphdr_addr);
+
+ for (i = 0; i < num_cpus; i++) {
+ if (be64_to_cpu(reg_entry->reg_id) !=
+ fadump_str_to_u64("CPUSTRT")) {
+ pr_err("Unable to read CPU state data\n");
+ rc = -ENOENT;
+ goto error_out;
+ }
+ /* Lower 4 bytes of reg_value contains logical cpu id */
+ cpu = (be64_to_cpu(reg_entry->reg_value) &
+ RTAS_FADUMP_CPU_ID_MASK);
+ if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_mask)) {
+ RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
+ continue;
+ }
+ pr_debug("Reading register data for cpu %d...\n", cpu);
+ if (fdh && fdh->crashing_cpu == cpu) {
+ regs = fdh->regs;
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry);
+ } else {
+ reg_entry++;
+ reg_entry = rtas_fadump_read_regs(reg_entry, &regs);
+ note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
+ }
+ }
+ final_note(note_buf);
+
+ if (fdh) {
+ pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+ fdh->elfcorehdr_addr);
+ fadump_update_elfcore_header(__va(fdh->elfcorehdr_addr));
+ }
+ return 0;
+
+error_out:
+ fadump_free_cpu_notes_buf();
+ return rc;
+
+}
+
+/*
+ * Validate and process the dump data stored by firmware before exporting
+ * it through '/proc/vmcore'.
+ */
+static int __init rtas_fadump_process(struct fw_dump *fadump_conf)
+{
+ struct fadump_crash_info_header *fdh;
+ int rc = 0;
+
+ if (!fdm_active || !fadump_conf->fadumphdr_addr)
+ return -EINVAL;
+
+ /* Check if the dump data is valid. */
+ if ((be16_to_cpu(fdm_active->header.dump_status_flag) ==
+ RTAS_FADUMP_ERROR_FLAG) ||
+ (fdm_active->cpu_state_data.error_flags != 0) ||
+ (fdm_active->rmr_region.error_flags != 0)) {
+ pr_err("Dump taken by platform is not valid\n");
+ return -EINVAL;
+ }
+ if ((fdm_active->rmr_region.bytes_dumped !=
+ fdm_active->rmr_region.source_len) ||
+ !fdm_active->cpu_state_data.bytes_dumped) {
+ pr_err("Dump taken by platform is incomplete\n");
+ return -EINVAL;
+ }
+
+ /* Validate the fadump crash info header */
+ fdh = __va(fadump_conf->fadumphdr_addr);
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ pr_err("Crash info header is not valid.\n");
+ return -EINVAL;
+ }
+
+ rc = rtas_fadump_build_cpu_notes(fadump_conf);
+ if (rc)
+ return rc;
+
+ /*
+ * We are done validating dump info and elfcore header is now ready
+ * to be exported. set elfcorehdr_addr so that vmcore module will
+ * export the elfcore header through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+ return 0;
+}
+
+static void rtas_fadump_region_show(struct fw_dump *fadump_conf,
+ struct seq_file *m)
+{
+ const struct rtas_fadump_section *cpu_data_section;
+ const struct rtas_fadump_mem_struct *fdm_ptr;
+
+ if (fdm_active)
+ fdm_ptr = fdm_active;
+ else
+ fdm_ptr = &fdm;
+
+ cpu_data_section = &(fdm_ptr->cpu_state_data);
+ seq_printf(m, "CPU :[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(cpu_data_section->destination_address),
+ be64_to_cpu(cpu_data_section->destination_address) +
+ be64_to_cpu(cpu_data_section->source_len) - 1,
+ be64_to_cpu(cpu_data_section->source_len),
+ be64_to_cpu(cpu_data_section->bytes_dumped));
+
+ seq_printf(m, "HPTE:[%#016llx-%#016llx] %#llx bytes, Dumped: %#llx\n",
+ be64_to_cpu(fdm_ptr->hpte_region.destination_address),
+ be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
+ be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
+ be64_to_cpu(fdm_ptr->hpte_region.source_len),
+ be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
+
+ seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
+ be64_to_cpu(fdm_ptr->rmr_region.source_address),
+ be64_to_cpu(fdm_ptr->rmr_region.destination_address));
+ seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
+ be64_to_cpu(fdm_ptr->rmr_region.source_len),
+ be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
+
+ /* Dump is active. Show preserved area start address. */
+ if (fdm_active) {
+ seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n",
+ fadump_conf->boot_mem_top);
+ }
+}
+
+static void rtas_fadump_trigger(struct fadump_crash_info_header *fdh,
+ const char *msg)
+{
+ /* Call ibm,os-term rtas call to trigger firmware assisted dump */
+ rtas_os_term((char *)msg);
+}
+
+static struct fadump_ops rtas_fadump_ops = {
+ .fadump_init_mem_struct = rtas_fadump_init_mem_struct,
+ .fadump_get_bootmem_min = rtas_fadump_get_bootmem_min,
+ .fadump_register = rtas_fadump_register,
+ .fadump_unregister = rtas_fadump_unregister,
+ .fadump_invalidate = rtas_fadump_invalidate,
+ .fadump_process = rtas_fadump_process,
+ .fadump_region_show = rtas_fadump_region_show,
+ .fadump_trigger = rtas_fadump_trigger,
+};
+
+void __init rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
+{
+ int i, size, num_sections;
+ const __be32 *sections;
+ const __be32 *token;
+
+ /*
+ * Check if Firmware Assisted dump is supported. if yes, check
+ * if dump has been initiated on last reboot.
+ */
+ token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
+ if (!token)
+ return;
+
+ fadump_conf->ibm_configure_kernel_dump = be32_to_cpu(*token);
+ fadump_conf->ops = &rtas_fadump_ops;
+ fadump_conf->fadump_supported = 1;
+
+ /* Firmware supports 64-bit value for size, align it to pagesize. */
+ fadump_conf->max_copy_size = ALIGN_DOWN(U64_MAX, PAGE_SIZE);
+
+ /*
+ * The 'ibm,kernel-dump' rtas node is present only if there is
+ * dump data waiting for us.
+ */
+ fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
+ if (fdm_active) {
+ pr_info("Firmware-assisted dump is active.\n");
+ fadump_conf->dump_active = 1;
+ rtas_fadump_get_config(fadump_conf, (void *)__pa(fdm_active));
+ }
+
+ /* Get the sizes required to store dump data for the firmware provided
+ * dump sections.
+ * For each dump section type supported, a 32bit cell which defines
+ * the ID of a supported section followed by two 32 bit cells which
+ * gives the size of the section in bytes.
+ */
+ sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
+ &size);
+
+ if (!sections)
+ return;
+
+ num_sections = size / (3 * sizeof(u32));
+
+ for (i = 0; i < num_sections; i++, sections += 3) {
+ u32 type = (u32)of_read_number(sections, 1);
+
+ switch (type) {
+ case RTAS_FADUMP_CPU_STATE_DATA:
+ fadump_conf->cpu_state_data_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ case RTAS_FADUMP_HPTE_REGION:
+ fadump_conf->hpte_region_size =
+ of_read_ulong(&sections[1], 2);
+ break;
+ }
+ }
+}
diff --git a/arch/powerpc/platforms/pseries/rtas-fadump.h b/arch/powerpc/platforms/pseries/rtas-fadump.h
new file mode 100644
index 000000000..fd59bd7ca
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-fadump.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump support on POWERVM platform.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _PSERIES_RTAS_FADUMP_H
+#define _PSERIES_RTAS_FADUMP_H
+
+/*
+ * On some Power systems where RMO is 128MB, it still requires minimum of
+ * 256MB for kernel to boot successfully. When kdump infrastructure is
+ * configured to save vmcore over network, we run into OOM issue while
+ * loading modules related to network setup. Hence we need additional 64M
+ * of memory to avoid OOM issue.
+ */
+#define RTAS_FADUMP_MIN_BOOT_MEM ((0x1UL << 28) + (0x1UL << 26))
+
+/* Firmware provided dump sections */
+#define RTAS_FADUMP_CPU_STATE_DATA 0x0001
+#define RTAS_FADUMP_HPTE_REGION 0x0002
+#define RTAS_FADUMP_REAL_MODE_REGION 0x0011
+
+/* Dump request flag */
+#define RTAS_FADUMP_REQUEST_FLAG 0x00000001
+
+/* Dump status flag */
+#define RTAS_FADUMP_ERROR_FLAG 0x2000
+
+/* Kernel Dump section info */
+struct rtas_fadump_section {
+ __be32 request_flag;
+ __be16 source_data_type;
+ __be16 error_flags;
+ __be64 source_address;
+ __be64 source_len;
+ __be64 bytes_dumped;
+ __be64 destination_address;
+};
+
+/* ibm,configure-kernel-dump header. */
+struct rtas_fadump_section_header {
+ __be32 dump_format_version;
+ __be16 dump_num_sections;
+ __be16 dump_status_flag;
+ __be32 offset_first_dump_section;
+
+ /* Fields for disk dump option. */
+ __be32 dd_block_size;
+ __be64 dd_block_offset;
+ __be64 dd_num_blocks;
+ __be32 dd_offset_disk_path;
+
+ /* Maximum time allowed to prevent an automatic dump-reboot. */
+ __be32 max_time_auto;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through rtas call.
+ *
+ * No disk dump option. Hence disk dump path string section is not included.
+ */
+struct rtas_fadump_mem_struct {
+ struct rtas_fadump_section_header header;
+
+ /* Kernel dump sections */
+ struct rtas_fadump_section cpu_state_data;
+ struct rtas_fadump_section hpte_region;
+
+ /*
+ * TODO: Extend multiple boot memory regions support in the kernel
+ * for this platform.
+ */
+ struct rtas_fadump_section rmr_region;
+};
+
+/*
+ * The firmware-assisted dump format.
+ *
+ * The register save area is an area in the partition's memory used to preserve
+ * the register contents (CPU state data) for the active CPUs during a firmware
+ * assisted dump. The dump format contains register save area header followed
+ * by register entries. Each list of registers for a CPU starts with "CPUSTRT"
+ * and ends with "CPUEND".
+ */
+
+/* Register save area header. */
+struct rtas_fadump_reg_save_area_header {
+ __be64 magic_number;
+ __be32 version;
+ __be32 num_cpu_offset;
+};
+
+/* Register entry. */
+struct rtas_fadump_reg_entry {
+ __be64 reg_id;
+ __be64 reg_value;
+};
+
+/* Utility macros */
+#define RTAS_FADUMP_SKIP_TO_NEXT_CPU(reg_entry) \
+({ \
+ while (be64_to_cpu(reg_entry->reg_id) != \
+ fadump_str_to_u64("CPUEND")) \
+ reg_entry++; \
+ reg_entry++; \
+})
+
+#define RTAS_FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
+
+#endif /* _PSERIES_RTAS_FADUMP_H */
diff --git a/arch/powerpc/platforms/pseries/rtas-work-area.c b/arch/powerpc/platforms/pseries/rtas-work-area.c
new file mode 100644
index 000000000..b37d52f40
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/rtas-work-area.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "rtas-work-area: " fmt
+
+#include <linux/genalloc.h>
+#include <linux/log2.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mempool.h>
+#include <linux/minmax.h>
+#include <linux/mutex.h>
+#include <linux/numa.h>
+#include <linux/sizes.h>
+#include <linux/wait.h>
+
+#include <asm/machdep.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
+
+enum {
+ /*
+ * Ensure the pool is page-aligned.
+ */
+ RTAS_WORK_AREA_ARENA_ALIGN = PAGE_SIZE,
+ /*
+ * Don't let a single allocation claim the whole arena.
+ */
+ RTAS_WORK_AREA_ARENA_SZ = RTAS_WORK_AREA_MAX_ALLOC_SZ * 2,
+ /*
+ * The smallest known work area size is for ibm,get-vpd's
+ * location code argument, which is limited to 79 characters
+ * plus 1 nul terminator.
+ *
+ * PAPR+ 7.3.20 ibm,get-vpd RTAS Call
+ * PAPR+ 12.3.2.4 Converged Location Code Rules - Length Restrictions
+ */
+ RTAS_WORK_AREA_MIN_ALLOC_SZ = roundup_pow_of_two(80),
+};
+
+static struct {
+ struct gen_pool *gen_pool;
+ char *arena;
+ struct mutex mutex; /* serializes allocations */
+ struct wait_queue_head wqh;
+ mempool_t descriptor_pool;
+ bool available;
+} rwa_state = {
+ .mutex = __MUTEX_INITIALIZER(rwa_state.mutex),
+ .wqh = __WAIT_QUEUE_HEAD_INITIALIZER(rwa_state.wqh),
+};
+
+/*
+ * A single work area buffer and descriptor to serve requests early in
+ * boot before the allocator is fully initialized. We know 4KB is the
+ * most any boot time user needs (they all call ibm,get-system-parameter).
+ */
+static bool early_work_area_in_use __initdata;
+static char early_work_area_buf[SZ_4K] __initdata __aligned(SZ_4K);
+static struct rtas_work_area early_work_area __initdata = {
+ .buf = early_work_area_buf,
+ .size = sizeof(early_work_area_buf),
+};
+
+
+static struct rtas_work_area * __init rtas_work_area_alloc_early(size_t size)
+{
+ WARN_ON(size > early_work_area.size);
+ WARN_ON(early_work_area_in_use);
+ early_work_area_in_use = true;
+ memset(early_work_area.buf, 0, early_work_area.size);
+ return &early_work_area;
+}
+
+static void __init rtas_work_area_free_early(struct rtas_work_area *work_area)
+{
+ WARN_ON(work_area != &early_work_area);
+ WARN_ON(!early_work_area_in_use);
+ early_work_area_in_use = false;
+}
+
+struct rtas_work_area * __ref __rtas_work_area_alloc(size_t size)
+{
+ struct rtas_work_area *area;
+ unsigned long addr;
+
+ might_sleep();
+
+ /*
+ * The rtas_work_area_alloc() wrapper enforces this at build
+ * time. Requests that exceed the arena size will block
+ * indefinitely.
+ */
+ WARN_ON(size > RTAS_WORK_AREA_MAX_ALLOC_SZ);
+
+ if (!rwa_state.available)
+ return rtas_work_area_alloc_early(size);
+ /*
+ * To ensure FCFS behavior and prevent a high rate of smaller
+ * requests from starving larger ones, use the mutex to queue
+ * allocations.
+ */
+ mutex_lock(&rwa_state.mutex);
+ wait_event(rwa_state.wqh,
+ (addr = gen_pool_alloc(rwa_state.gen_pool, size)) != 0);
+ mutex_unlock(&rwa_state.mutex);
+
+ area = mempool_alloc(&rwa_state.descriptor_pool, GFP_KERNEL);
+ area->buf = (char *)addr;
+ area->size = size;
+
+ return area;
+}
+
+void __ref rtas_work_area_free(struct rtas_work_area *area)
+{
+ if (!rwa_state.available) {
+ rtas_work_area_free_early(area);
+ return;
+ }
+
+ gen_pool_free(rwa_state.gen_pool, (unsigned long)area->buf, area->size);
+ mempool_free(area, &rwa_state.descriptor_pool);
+ wake_up(&rwa_state.wqh);
+}
+
+/*
+ * Initialization of the work area allocator happens in two parts. To
+ * reliably reserve an arena that satisfies RTAS addressing
+ * requirements, we must perform a memblock allocation early,
+ * immmediately after RTAS instantiation. Then we have to wait until
+ * the slab allocator is up before setting up the descriptor mempool
+ * and adding the arena to a gen_pool.
+ */
+static __init int rtas_work_area_allocator_init(void)
+{
+ const unsigned int order = ilog2(RTAS_WORK_AREA_MIN_ALLOC_SZ);
+ const phys_addr_t pa_start = __pa(rwa_state.arena);
+ const phys_addr_t pa_end = pa_start + RTAS_WORK_AREA_ARENA_SZ - 1;
+ struct gen_pool *pool;
+ const int nid = NUMA_NO_NODE;
+ int err;
+
+ err = -ENOMEM;
+ if (!rwa_state.arena)
+ goto err_out;
+
+ pool = gen_pool_create(order, nid);
+ if (!pool)
+ goto err_out;
+ /*
+ * All RTAS functions that consume work areas are OK with
+ * natural alignment, when they have alignment requirements at
+ * all.
+ */
+ gen_pool_set_algo(pool, gen_pool_first_fit_order_align, NULL);
+
+ err = gen_pool_add(pool, (unsigned long)rwa_state.arena,
+ RTAS_WORK_AREA_ARENA_SZ, nid);
+ if (err)
+ goto err_destroy;
+
+ err = mempool_init_kmalloc_pool(&rwa_state.descriptor_pool, 1,
+ sizeof(struct rtas_work_area));
+ if (err)
+ goto err_destroy;
+
+ rwa_state.gen_pool = pool;
+ rwa_state.available = true;
+
+ pr_debug("arena [%pa-%pa] (%uK), min/max alloc sizes %u/%u\n",
+ &pa_start, &pa_end,
+ RTAS_WORK_AREA_ARENA_SZ / SZ_1K,
+ RTAS_WORK_AREA_MIN_ALLOC_SZ,
+ RTAS_WORK_AREA_MAX_ALLOC_SZ);
+
+ return 0;
+
+err_destroy:
+ gen_pool_destroy(pool);
+err_out:
+ return err;
+}
+machine_arch_initcall(pseries, rtas_work_area_allocator_init);
+
+/**
+ * rtas_work_area_reserve_arena() - Reserve memory suitable for RTAS work areas.
+ */
+void __init rtas_work_area_reserve_arena(const phys_addr_t limit)
+{
+ const phys_addr_t align = RTAS_WORK_AREA_ARENA_ALIGN;
+ const phys_addr_t size = RTAS_WORK_AREA_ARENA_SZ;
+ const phys_addr_t min = MEMBLOCK_LOW_LIMIT;
+ const int nid = NUMA_NO_NODE;
+
+ /*
+ * Too early for a machine_is(pseries) check. But PAPR
+ * effectively mandates that ibm,get-system-parameter is
+ * present:
+ *
+ * R1–7.3.16–1. All platforms must support the System
+ * Parameters option.
+ *
+ * So set up the arena if we find that, with a fallback to
+ * ibm,configure-connector, just in case.
+ */
+ if (rtas_function_implemented(RTAS_FN_IBM_GET_SYSTEM_PARAMETER) ||
+ rtas_function_implemented(RTAS_FN_IBM_CONFIGURE_CONNECTOR))
+ rwa_state.arena = memblock_alloc_try_nid(size, align, min, limit, nid);
+}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
new file mode 100644
index 000000000..ecea85c74
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -0,0 +1,1162 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * 64-bit pSeries and RS/6000 setup code.
+ *
+ * Copyright (C) 1995 Linus Torvalds
+ * Adapted from 'alpha' version by Gary Thomas
+ * Modified by Cort Dougan (cort@cs.nmt.edu)
+ * Modified by PPC64 Team, IBM Corp
+ */
+
+/*
+ * bootup setup stuff..
+ */
+
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/major.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include <linux/utsname.h>
+#include <linux/adb.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/root_dev.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/memblock.h>
+#include <linux/swiotlb.h>
+#include <linux/seq_buf.h>
+
+#include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+#include <asm/rtas.h>
+#include <asm/pci-bridge.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/irq.h>
+#include <asm/time.h>
+#include <asm/nvram.h>
+#include <asm/pmc.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/papr-sysparm.h>
+#include <asm/ppc-pci.h>
+#include <asm/i8259.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/firmware.h>
+#include <asm/eeh.h>
+#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/kexec.h>
+#include <asm/isa-bridge.h>
+#include <asm/security_features.h>
+#include <asm/asm-const.h>
+#include <asm/idle.h>
+#include <asm/swiotlb.h>
+#include <asm/svm.h>
+#include <asm/dtl.h>
+#include <asm/hvconsole.h>
+#include <asm/setup.h>
+
+#include "pseries.h"
+
+DEFINE_STATIC_KEY_FALSE(shared_processor);
+EXPORT_SYMBOL(shared_processor);
+
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+ steal_acc = false;
+ return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+#endif
+
+int CMO_PrPSP = -1;
+int CMO_SecPSP = -1;
+unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
+EXPORT_SYMBOL(CMO_PageSize);
+
+int fwnmi_active; /* TRUE if an FWNMI handler is present */
+int ibm_nmi_interlock_token;
+u32 pseries_security_flavor;
+
+static void pSeries_show_cpuinfo(struct seq_file *m)
+{
+ struct device_node *root;
+ const char *model = "";
+
+ root = of_find_node_by_path("/");
+ if (root)
+ model = of_get_property(root, "model", NULL);
+ seq_printf(m, "machine\t\t: CHRP %s\n", model);
+ of_node_put(root);
+ if (radix_enabled())
+ seq_printf(m, "MMU\t\t: Radix\n");
+ else
+ seq_printf(m, "MMU\t\t: Hash\n");
+}
+
+/* Initialize firmware assisted non-maskable interrupts if
+ * the firmware supports this feature.
+ */
+static void __init fwnmi_init(void)
+{
+ unsigned long system_reset_addr, machine_check_addr;
+ u8 *mce_data_buf;
+ unsigned int i;
+ int nr_cpus = num_possible_cpus();
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ struct slb_entry *slb_ptr;
+ size_t size;
+#endif
+ int ibm_nmi_register_token;
+
+ ibm_nmi_register_token = rtas_function_token(RTAS_FN_IBM_NMI_REGISTER);
+ if (ibm_nmi_register_token == RTAS_UNKNOWN_SERVICE)
+ return;
+
+ ibm_nmi_interlock_token = rtas_function_token(RTAS_FN_IBM_NMI_INTERLOCK);
+ if (WARN_ON(ibm_nmi_interlock_token == RTAS_UNKNOWN_SERVICE))
+ return;
+
+ /* If the kernel's not linked at zero we point the firmware at low
+ * addresses anyway, and use a trampoline to get to the real code. */
+ system_reset_addr = __pa(system_reset_fwnmi) - PHYSICAL_START;
+ machine_check_addr = __pa(machine_check_fwnmi) - PHYSICAL_START;
+
+ if (0 == rtas_call(ibm_nmi_register_token, 2, 1, NULL,
+ system_reset_addr, machine_check_addr))
+ fwnmi_active = 1;
+
+ /*
+ * Allocate a chunk for per cpu buffer to hold rtas errorlog.
+ * It will be used in real mode mce handler, hence it needs to be
+ * below RMA.
+ */
+ mce_data_buf = memblock_alloc_try_nid_raw(RTAS_ERROR_LOG_MAX * nr_cpus,
+ RTAS_ERROR_LOG_MAX, MEMBLOCK_LOW_LIMIT,
+ ppc64_rma_size, NUMA_NO_NODE);
+ if (!mce_data_buf)
+ panic("Failed to allocate %d bytes below %pa for MCE buffer\n",
+ RTAS_ERROR_LOG_MAX * nr_cpus, &ppc64_rma_size);
+
+ for_each_possible_cpu(i) {
+ paca_ptrs[i]->mce_data_buf = mce_data_buf +
+ (RTAS_ERROR_LOG_MAX * i);
+ }
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (!radix_enabled()) {
+ /* Allocate per cpu area to save old slb contents during MCE */
+ size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
+ slb_ptr = memblock_alloc_try_nid_raw(size,
+ sizeof(struct slb_entry), MEMBLOCK_LOW_LIMIT,
+ ppc64_rma_size, NUMA_NO_NODE);
+ if (!slb_ptr)
+ panic("Failed to allocate %zu bytes below %pa for slb area\n",
+ size, &ppc64_rma_size);
+
+ for_each_possible_cpu(i)
+ paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
+ }
+#endif
+}
+
+/*
+ * Affix a device for the first timer to the platform bus if
+ * we have firmware support for the H_WATCHDOG hypercall.
+ */
+static __init int pseries_wdt_init(void)
+{
+ if (firmware_has_feature(FW_FEATURE_WATCHDOG))
+ platform_device_register_simple("pseries-wdt", 0, NULL, 0);
+ return 0;
+}
+machine_subsys_initcall(pseries, pseries_wdt_init);
+
+static void pseries_8259_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int cascade_irq = i8259_irq();
+
+ if (cascade_irq)
+ generic_handle_irq(cascade_irq);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+static void __init pseries_setup_i8259_cascade(void)
+{
+ struct device_node *np, *old, *found = NULL;
+ unsigned int cascade;
+ const u32 *addrp;
+ unsigned long intack = 0;
+ int naddr;
+
+ for_each_node_by_type(np, "interrupt-controller") {
+ if (of_device_is_compatible(np, "chrp,iic")) {
+ found = np;
+ break;
+ }
+ }
+
+ if (found == NULL) {
+ printk(KERN_DEBUG "pic: no ISA interrupt controller\n");
+ return;
+ }
+
+ cascade = irq_of_parse_and_map(found, 0);
+ if (!cascade) {
+ printk(KERN_ERR "pic: failed to map cascade interrupt");
+ return;
+ }
+ pr_debug("pic: cascade mapped to irq %d\n", cascade);
+
+ for (old = of_node_get(found); old != NULL ; old = np) {
+ np = of_get_parent(old);
+ of_node_put(old);
+ if (np == NULL)
+ break;
+ if (!of_node_name_eq(np, "pci"))
+ continue;
+ addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL);
+ if (addrp == NULL)
+ continue;
+ naddr = of_n_addr_cells(np);
+ intack = addrp[naddr-1];
+ if (naddr > 1)
+ intack |= ((unsigned long)addrp[naddr-2]) << 32;
+ }
+ if (intack)
+ printk(KERN_DEBUG "pic: PCI 8259 intack at 0x%016lx\n", intack);
+ i8259_init(found, intack);
+ of_node_put(found);
+ irq_set_chained_handler(cascade, pseries_8259_cascade);
+}
+
+static void __init pseries_init_irq(void)
+{
+ /* Try using a XIVE if available, otherwise use a XICS */
+ if (!xive_spapr_init()) {
+ xics_init();
+ pseries_setup_i8259_cascade();
+ }
+}
+
+static void pseries_lpar_enable_pmcs(void)
+{
+ unsigned long set, reset;
+
+ set = 1UL << 63;
+ reset = 0;
+ plpar_hcall_norets(H_PERFMON, set, reset);
+}
+
+static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct of_reconfig_data *rd = data;
+ struct device_node *parent, *np = rd->dn;
+ struct pci_dn *pdn;
+ int err = NOTIFY_OK;
+
+ switch (action) {
+ case OF_RECONFIG_ATTACH_NODE:
+ parent = of_get_parent(np);
+ pdn = parent ? PCI_DN(parent) : NULL;
+ if (pdn)
+ pci_add_device_node_info(pdn->phb, np);
+
+ of_node_put(parent);
+ break;
+ case OF_RECONFIG_DETACH_NODE:
+ pdn = PCI_DN(np);
+ if (pdn)
+ list_del(&pdn->list);
+ break;
+ default:
+ err = NOTIFY_DONE;
+ break;
+ }
+ return err;
+}
+
+static struct notifier_block pci_dn_reconfig_nb = {
+ .notifier_call = pci_dn_reconfig_notifier,
+};
+
+struct kmem_cache *dtl_cache;
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+/*
+ * Allocate space for the dispatch trace log for all possible cpus
+ * and register the buffers with the hypervisor. This is used for
+ * computing time stolen by the hypervisor.
+ */
+static int alloc_dispatch_logs(void)
+{
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return 0;
+
+ if (!dtl_cache)
+ return 0;
+
+ alloc_dtl_buffers(0);
+
+ /* Register the DTL for the current (boot) cpu */
+ register_dtl_buffer(smp_processor_id());
+
+ return 0;
+}
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+static inline int alloc_dispatch_logs(void)
+{
+ return 0;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+static int alloc_dispatch_log_kmem_cache(void)
+{
+ void (*ctor)(void *) = get_dtl_cache_ctor();
+
+ dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES,
+ DISPATCH_LOG_BYTES, 0, ctor);
+ if (!dtl_cache) {
+ pr_warn("Failed to create dispatch trace log buffer cache\n");
+ pr_warn("Stolen time statistics will be unreliable\n");
+ return 0;
+ }
+
+ return alloc_dispatch_logs();
+}
+machine_early_initcall(pseries, alloc_dispatch_log_kmem_cache);
+
+DEFINE_PER_CPU(u64, idle_spurr_cycles);
+DEFINE_PER_CPU(u64, idle_entry_purr_snap);
+DEFINE_PER_CPU(u64, idle_entry_spurr_snap);
+static void pseries_lpar_idle(void)
+{
+ /*
+ * Default handler to go into low thread priority and possibly
+ * low power mode by ceding processor to hypervisor
+ */
+
+ if (!prep_irq_for_idle())
+ return;
+
+ /* Indicate to hypervisor that we are idle. */
+ pseries_idle_prolog();
+
+ /*
+ * Yield the processor to the hypervisor. We return if
+ * an external interrupt occurs (which are driven prior
+ * to returning here) or if a prod occurs from another
+ * processor. When returning here, external interrupts
+ * are enabled.
+ */
+ cede_processor();
+
+ pseries_idle_epilog();
+}
+
+static bool pseries_reloc_on_exception_enabled;
+
+bool pseries_reloc_on_exception(void)
+{
+ return pseries_reloc_on_exception_enabled;
+}
+EXPORT_SYMBOL_GPL(pseries_reloc_on_exception);
+
+/*
+ * Enable relocation on during exceptions. This has partition wide scope and
+ * may take a while to complete, if it takes longer than one second we will
+ * just give up rather than wasting any more time on this - if that turns out
+ * to ever be a problem in practice we can move this into a kernel thread to
+ * finish off the process later in boot.
+ */
+bool pseries_enable_reloc_on_exc(void)
+{
+ long rc;
+ unsigned int delay, total_delay = 0;
+
+ while (1) {
+ rc = enable_reloc_on_exceptions();
+ if (!H_IS_LONG_BUSY(rc)) {
+ if (rc == H_P2) {
+ pr_info("Relocation on exceptions not"
+ " supported\n");
+ return false;
+ } else if (rc != H_SUCCESS) {
+ pr_warn("Unable to enable relocation"
+ " on exceptions: %ld\n", rc);
+ return false;
+ }
+ pseries_reloc_on_exception_enabled = true;
+ return true;
+ }
+
+ delay = get_longbusy_msecs(rc);
+ total_delay += delay;
+ if (total_delay > 1000) {
+ pr_warn("Warning: Giving up waiting to enable "
+ "relocation on exceptions (%u msec)!\n",
+ total_delay);
+ return false;
+ }
+
+ mdelay(delay);
+ }
+}
+EXPORT_SYMBOL(pseries_enable_reloc_on_exc);
+
+void pseries_disable_reloc_on_exc(void)
+{
+ long rc;
+
+ while (1) {
+ rc = disable_reloc_on_exceptions();
+ if (!H_IS_LONG_BUSY(rc))
+ break;
+ mdelay(get_longbusy_msecs(rc));
+ }
+ if (rc == H_SUCCESS)
+ pseries_reloc_on_exception_enabled = false;
+ else
+ pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n",
+ rc);
+}
+EXPORT_SYMBOL(pseries_disable_reloc_on_exc);
+
+#ifdef __LITTLE_ENDIAN__
+void pseries_big_endian_exceptions(void)
+{
+ long rc;
+
+ while (1) {
+ rc = enable_big_endian_exceptions();
+ if (!H_IS_LONG_BUSY(rc))
+ break;
+ mdelay(get_longbusy_msecs(rc));
+ }
+
+ /*
+ * At this point it is unlikely panic() will get anything
+ * out to the user, since this is called very late in kexec
+ * but at least this will stop us from continuing on further
+ * and creating an even more difficult to debug situation.
+ *
+ * There is a known problem when kdump'ing, if cpus are offline
+ * the above call will fail. Rather than panicking again, keep
+ * going and hope the kdump kernel is also little endian, which
+ * it usually is.
+ */
+ if (rc && !kdump_in_progress())
+ panic("Could not enable big endian exceptions");
+}
+
+void __init pseries_little_endian_exceptions(void)
+{
+ long rc;
+
+ while (1) {
+ rc = enable_little_endian_exceptions();
+ if (!H_IS_LONG_BUSY(rc))
+ break;
+ mdelay(get_longbusy_msecs(rc));
+ }
+ if (rc) {
+ ppc_md.progress("H_SET_MODE LE exception fail", 0);
+ panic("Could not enable little endian exceptions");
+ }
+}
+#endif
+
+static void __init pSeries_discover_phbs(void)
+{
+ struct device_node *node;
+ struct pci_controller *phb;
+ struct device_node *root = of_find_node_by_path("/");
+
+ for_each_child_of_node(root, node) {
+ if (!of_node_is_type(node, "pci") &&
+ !of_node_is_type(node, "pciex"))
+ continue;
+
+ phb = pcibios_alloc_controller(node);
+ if (!phb)
+ continue;
+ rtas_setup_phb(phb);
+ pci_process_bridge_OF_ranges(phb, node, 0);
+ isa_bridge_find_early(phb);
+ phb->controller_ops = pseries_pci_controller_ops;
+
+ /* create pci_dn's for DT nodes under this PHB */
+ pci_devs_phb_init_dynamic(phb);
+
+ pseries_msi_allocate_domains(phb);
+ }
+
+ of_node_put(root);
+
+ /*
+ * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
+ * in chosen.
+ */
+ of_pci_check_probe_only();
+}
+
+static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
+{
+ /*
+ * The features below are disabled by default, so we instead look to see
+ * if firmware has *enabled* them, and set them if so.
+ */
+ if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
+ security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+ if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
+ security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+ if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+ security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+ if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+ security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+ if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
+ security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+ if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
+ security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+ if (result->character & H_CPU_CHAR_BCCTR_FLUSH_ASSIST)
+ security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
+
+ if (result->character & H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST)
+ security_ftr_set(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST);
+
+ if (result->behaviour & H_CPU_BEHAV_FLUSH_COUNT_CACHE)
+ security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
+
+ if (result->behaviour & H_CPU_BEHAV_FLUSH_LINK_STACK)
+ security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
+ /*
+ * The features below are enabled by default, so we instead look to see
+ * if firmware has *disabled* them, and clear them if so.
+ * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if
+ * H_CPU_BEHAV_FAVOUR_SECURITY is.
+ */
+ if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) {
+ security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+ pseries_security_flavor = 0;
+ } else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H)
+ pseries_security_flavor = 1;
+ else
+ pseries_security_flavor = 2;
+
+ if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+ if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY)
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
+
+ if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS)
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
+
+ if (result->behaviour & H_CPU_BEHAV_NO_STF_BARRIER)
+ security_ftr_clear(SEC_FTR_STF_BARRIER);
+
+ if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
+ security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
+void pseries_setup_security_mitigations(void)
+{
+ struct h_cpu_char_result result;
+ enum l1d_flush_type types;
+ bool enable;
+ long rc;
+
+ /*
+ * Set features to the defaults assumed by init_cpu_char_feature_flags()
+ * so it can set/clear again any features that might have changed after
+ * migration, and in case the hypercall fails and it is not even called.
+ */
+ powerpc_security_features = SEC_FTR_DEFAULT;
+
+ rc = plpar_get_cpu_characteristics(&result);
+ if (rc == H_SUCCESS)
+ init_cpu_char_feature_flags(&result);
+
+ /*
+ * We're the guest so this doesn't apply to us, clear it to simplify
+ * handling of it elsewhere.
+ */
+ security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
+
+ types = L1D_FLUSH_FALLBACK;
+
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+ types |= L1D_FLUSH_MTTRIG;
+
+ if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
+ types |= L1D_FLUSH_ORI;
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
+
+ setup_rfi_flush(types, enable);
+ setup_count_cache_flush();
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
+ setup_entry_flush(enable);
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
+ setup_uaccess_flush(enable);
+
+ setup_stf_barrier();
+}
+
+#ifdef CONFIG_PCI_IOV
+enum rtas_iov_fw_value_map {
+ NUM_RES_PROPERTY = 0, /* Number of Resources */
+ LOW_INT = 1, /* Lowest 32 bits of Address */
+ START_OF_ENTRIES = 2, /* Always start of entry */
+ APERTURE_PROPERTY = 2, /* Start of entry+ to Aperture Size */
+ WDW_SIZE_PROPERTY = 4, /* Start of entry+ to Window Size */
+ NEXT_ENTRY = 7 /* Go to next entry on array */
+};
+
+enum get_iov_fw_value_index {
+ BAR_ADDRS = 1, /* Get Bar Address */
+ APERTURE_SIZE = 2, /* Get Aperture Size */
+ WDW_SIZE = 3 /* Get Window Size */
+};
+
+static resource_size_t pseries_get_iov_fw_value(struct pci_dev *dev, int resno,
+ enum get_iov_fw_value_index value)
+{
+ const int *indexes;
+ struct device_node *dn = pci_device_to_OF_node(dev);
+ int i, num_res, ret = 0;
+
+ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+ if (!indexes)
+ return 0;
+
+ /*
+ * First element in the array is the number of Bars
+ * returned. Search through the list to find the matching
+ * bar
+ */
+ num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+ if (resno >= num_res)
+ return 0; /* or an error */
+
+ i = START_OF_ENTRIES + NEXT_ENTRY * resno;
+ switch (value) {
+ case BAR_ADDRS:
+ ret = of_read_number(&indexes[i], 2);
+ break;
+ case APERTURE_SIZE:
+ ret = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
+ break;
+ case WDW_SIZE:
+ ret = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
+ break;
+ }
+
+ return ret;
+}
+
+static void of_pci_set_vf_bar_size(struct pci_dev *dev, const int *indexes)
+{
+ struct resource *res;
+ resource_size_t base, size;
+ int i, r, num_res;
+
+ num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+ num_res = min_t(int, num_res, PCI_SRIOV_NUM_BARS);
+ for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
+ i += NEXT_ENTRY, r++) {
+ res = &dev->resource[r + PCI_IOV_RESOURCES];
+ base = of_read_number(&indexes[i], 2);
+ size = of_read_number(&indexes[i + APERTURE_PROPERTY], 2);
+ res->flags = pci_parse_of_flags(of_read_number
+ (&indexes[i + LOW_INT], 1), 0);
+ res->flags |= (IORESOURCE_MEM_64 | IORESOURCE_PCI_FIXED);
+ res->name = pci_name(dev);
+ res->start = base;
+ res->end = base + size - 1;
+ }
+}
+
+static void of_pci_parse_iov_addrs(struct pci_dev *dev, const int *indexes)
+{
+ struct resource *res, *root, *conflict;
+ resource_size_t base, size;
+ int i, r, num_res;
+
+ /*
+ * First element in the array is the number of Bars
+ * returned. Search through the list to find the matching
+ * bars assign them from firmware into resources structure.
+ */
+ num_res = of_read_number(&indexes[NUM_RES_PROPERTY], 1);
+ for (i = START_OF_ENTRIES, r = 0; r < num_res && r < PCI_SRIOV_NUM_BARS;
+ i += NEXT_ENTRY, r++) {
+ res = &dev->resource[r + PCI_IOV_RESOURCES];
+ base = of_read_number(&indexes[i], 2);
+ size = of_read_number(&indexes[i + WDW_SIZE_PROPERTY], 2);
+ res->name = pci_name(dev);
+ res->start = base;
+ res->end = base + size - 1;
+ root = &iomem_resource;
+ dev_dbg(&dev->dev,
+ "pSeries IOV BAR %d: trying firmware assignment %pR\n",
+ r + PCI_IOV_RESOURCES, res);
+ conflict = request_resource_conflict(root, res);
+ if (conflict) {
+ dev_info(&dev->dev,
+ "BAR %d: %pR conflicts with %s %pR\n",
+ r + PCI_IOV_RESOURCES, res,
+ conflict->name, conflict);
+ res->flags |= IORESOURCE_UNSET;
+ }
+ }
+}
+
+static void pseries_disable_sriov_resources(struct pci_dev *pdev)
+{
+ int i;
+
+ pci_warn(pdev, "No hypervisor support for SR-IOV on this device, IOV BARs disabled.\n");
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+ pdev->resource[i + PCI_IOV_RESOURCES].flags = 0;
+}
+
+static void pseries_pci_fixup_resources(struct pci_dev *pdev)
+{
+ const int *indexes;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ /*Firmware must support open sriov otherwise dont configure*/
+ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+ if (indexes)
+ of_pci_set_vf_bar_size(pdev, indexes);
+ else
+ pseries_disable_sriov_resources(pdev);
+}
+
+static void pseries_pci_fixup_iov_resources(struct pci_dev *pdev)
+{
+ const int *indexes;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ if (!pdev->is_physfn)
+ return;
+ /*Firmware must support open sriov otherwise don't configure*/
+ indexes = of_get_property(dn, "ibm,open-sriov-vf-bar-info", NULL);
+ if (indexes)
+ of_pci_parse_iov_addrs(pdev, indexes);
+ else
+ pseries_disable_sriov_resources(pdev);
+}
+
+static resource_size_t pseries_pci_iov_resource_alignment(struct pci_dev *pdev,
+ int resno)
+{
+ const __be32 *reg;
+ struct device_node *dn = pci_device_to_OF_node(pdev);
+
+ /*Firmware must support open sriov otherwise report regular alignment*/
+ reg = of_get_property(dn, "ibm,is-open-sriov-pf", NULL);
+ if (!reg)
+ return pci_iov_resource_size(pdev, resno);
+
+ if (!pdev->is_physfn)
+ return 0;
+ return pseries_get_iov_fw_value(pdev,
+ resno - PCI_IOV_RESOURCES,
+ APERTURE_SIZE);
+}
+#endif
+
+static void __init pSeries_setup_arch(void)
+{
+ set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+
+ /* Discover PIC type and setup ppc_md accordingly */
+ smp_init_pseries();
+
+ // Setup CPU hotplug callbacks
+ pseries_cpu_hotplug_init();
+
+ if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE))
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+ panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n");
+
+
+ /* openpic global configuration register (64-bit format). */
+ /* openpic Interrupt Source Unit pointer (64-bit format). */
+ /* python0 facility area (mmio) (64-bit format) REAL address. */
+
+ /* init to some ~sane value until calibrate_delay() runs */
+ loops_per_jiffy = 50000000;
+
+ fwnmi_init();
+
+ pseries_setup_security_mitigations();
+ if (!radix_enabled())
+ pseries_lpar_read_hblkrm_characteristics();
+
+ /* By default, only probe PCI (can be overridden by rtas_pci) */
+ pci_add_flags(PCI_PROBE_ONLY);
+
+ /* Find and initialize PCI host bridges */
+ init_pci_config_tokens();
+ of_reconfig_notifier_register(&pci_dn_reconfig_nb);
+
+ pSeries_nvram_init();
+
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ vpa_init(boot_cpuid);
+
+ if (lppaca_shared_proc()) {
+ static_branch_enable(&shared_processor);
+ pv_spinlocks_init();
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+ static_key_slow_inc(&paravirt_steal_enabled);
+ if (steal_acc)
+ static_key_slow_inc(&paravirt_steal_rq_enabled);
+#endif
+ }
+
+ ppc_md.power_save = pseries_lpar_idle;
+ ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
+#ifdef CONFIG_PCI_IOV
+ ppc_md.pcibios_fixup_resources =
+ pseries_pci_fixup_resources;
+ ppc_md.pcibios_fixup_sriov =
+ pseries_pci_fixup_iov_resources;
+ ppc_md.pcibios_iov_resource_alignment =
+ pseries_pci_iov_resource_alignment;
+#endif
+ } else {
+ /* No special idle routine */
+ ppc_md.enable_pmcs = power4_enable_pmcs;
+ }
+
+ ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
+ pseries_rng_init();
+}
+
+static void pseries_panic(char *str)
+{
+ panic_flush_kmsg_end();
+ rtas_os_term(str);
+}
+
+static int __init pSeries_init_panel(void)
+{
+ /* Manually leave the kernel version on the panel. */
+#ifdef __BIG_ENDIAN__
+ ppc_md.progress("Linux ppc64\n", 0);
+#else
+ ppc_md.progress("Linux ppc64le\n", 0);
+#endif
+ ppc_md.progress(init_utsname()->version, 0);
+
+ return 0;
+}
+machine_arch_initcall(pseries, pSeries_init_panel);
+
+static int pseries_set_dabr(unsigned long dabr, unsigned long dabrx)
+{
+ return plpar_hcall_norets(H_SET_DABR, dabr);
+}
+
+static int pseries_set_xdabr(unsigned long dabr, unsigned long dabrx)
+{
+ /* Have to set at least one bit in the DABRX according to PAPR */
+ if (dabrx == 0 && dabr == 0)
+ dabrx = DABRX_USER;
+ /* PAPR says we can only set kernel and user bits */
+ dabrx &= DABRX_KERNEL | DABRX_USER;
+
+ return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
+}
+
+static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx)
+{
+ /* PAPR says we can't set HYP */
+ dawrx &= ~DAWRX_HYP;
+
+ if (nr == 0)
+ return plpar_set_watchpoint0(dawr, dawrx);
+ else
+ return plpar_set_watchpoint1(dawr, dawrx);
+}
+
+#define CMO_CHARACTERISTICS_TOKEN 44
+#define CMO_MAXLENGTH 1026
+
+void pSeries_coalesce_init(void)
+{
+ struct hvcall_mpp_x_data mpp_x_data;
+
+ if (firmware_has_feature(FW_FEATURE_CMO) && !h_get_mpp_x(&mpp_x_data))
+ powerpc_firmware_features |= FW_FEATURE_XCMO;
+ else
+ powerpc_firmware_features &= ~FW_FEATURE_XCMO;
+}
+
+/**
+ * fw_cmo_feature_init - FW_FEATURE_CMO is not stored in ibm,hypertas-functions,
+ * handle that here. (Stolen from parse_system_parameter_string)
+ */
+static void __init pSeries_cmo_feature_init(void)
+{
+ static struct papr_sysparm_buf buf __initdata;
+ static_assert(sizeof(buf.val) >= CMO_MAXLENGTH);
+ char *ptr, *key, *value, *end;
+ int page_order = IOMMU_PAGE_SHIFT_4K;
+
+ pr_debug(" -> fw_cmo_feature_init()\n");
+
+ if (papr_sysparm_get(PAPR_SYSPARM_COOP_MEM_OVERCOMMIT_ATTRS, &buf)) {
+ pr_debug("CMO not available\n");
+ pr_debug(" <- fw_cmo_feature_init()\n");
+ return;
+ }
+
+ end = &buf.val[CMO_MAXLENGTH];
+ ptr = &buf.val[0];
+ key = value = ptr;
+
+ while (*ptr && (ptr <= end)) {
+ /* Separate the key and value by replacing '=' with '\0' and
+ * point the value at the string after the '='
+ */
+ if (ptr[0] == '=') {
+ ptr[0] = '\0';
+ value = ptr + 1;
+ } else if (ptr[0] == '\0' || ptr[0] == ',') {
+ /* Terminate the string containing the key/value pair */
+ ptr[0] = '\0';
+
+ if (key == value) {
+ pr_debug("Malformed key/value pair\n");
+ /* Never found a '=', end processing */
+ break;
+ }
+
+ if (0 == strcmp(key, "CMOPageSize"))
+ page_order = simple_strtol(value, NULL, 10);
+ else if (0 == strcmp(key, "PrPSP"))
+ CMO_PrPSP = simple_strtol(value, NULL, 10);
+ else if (0 == strcmp(key, "SecPSP"))
+ CMO_SecPSP = simple_strtol(value, NULL, 10);
+ value = key = ptr + 1;
+ }
+ ptr++;
+ }
+
+ /* Page size is returned as the power of 2 of the page size,
+ * convert to the page size in bytes before returning
+ */
+ CMO_PageSize = 1 << page_order;
+ pr_debug("CMO_PageSize = %lu\n", CMO_PageSize);
+
+ if (CMO_PrPSP != -1 || CMO_SecPSP != -1) {
+ pr_info("CMO enabled\n");
+ pr_debug("CMO enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
+ CMO_SecPSP);
+ powerpc_firmware_features |= FW_FEATURE_CMO;
+ pSeries_coalesce_init();
+ } else
+ pr_debug("CMO not enabled, PrPSP=%d, SecPSP=%d\n", CMO_PrPSP,
+ CMO_SecPSP);
+ pr_debug(" <- fw_cmo_feature_init()\n");
+}
+
+static void __init pseries_add_hw_description(void)
+{
+ struct device_node *dn;
+ const char *s;
+
+ dn = of_find_node_by_path("/openprom");
+ if (dn) {
+ if (of_property_read_string(dn, "model", &s) == 0)
+ seq_buf_printf(&ppc_hw_desc, "of:%s ", s);
+
+ of_node_put(dn);
+ }
+
+ dn = of_find_node_by_path("/hypervisor");
+ if (dn) {
+ if (of_property_read_string(dn, "compatible", &s) == 0)
+ seq_buf_printf(&ppc_hw_desc, "hv:%s ", s);
+
+ of_node_put(dn);
+ return;
+ }
+
+ if (of_property_read_bool(of_root, "ibm,powervm-partition") ||
+ of_property_read_bool(of_root, "ibm,fw-net-version"))
+ seq_buf_printf(&ppc_hw_desc, "hv:phyp ");
+}
+
+/*
+ * Early initialization. Relocation is on but do not reference unbolted pages
+ */
+static void __init pseries_init(void)
+{
+ pr_debug(" -> pseries_init()\n");
+
+ pseries_add_hw_description();
+
+#ifdef CONFIG_HVC_CONSOLE
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ hvc_vio_init_early();
+#endif
+ if (firmware_has_feature(FW_FEATURE_XDABR))
+ ppc_md.set_dabr = pseries_set_xdabr;
+ else if (firmware_has_feature(FW_FEATURE_DABR))
+ ppc_md.set_dabr = pseries_set_dabr;
+
+ if (firmware_has_feature(FW_FEATURE_SET_MODE))
+ ppc_md.set_dawr = pseries_set_dawr;
+
+ pSeries_cmo_feature_init();
+ iommu_init_early_pSeries();
+
+ pr_debug(" <- pseries_init()\n");
+}
+
+/**
+ * pseries_power_off - tell firmware about how to power off the system.
+ *
+ * This function calls either the power-off rtas token in normal cases
+ * or the ibm,power-off-ups token (if present & requested) in case of
+ * a power failure. If power-off token is used, power on will only be
+ * possible with power button press. If ibm,power-off-ups token is used
+ * it will allow auto poweron after power is restored.
+ */
+static void pseries_power_off(void)
+{
+ int rc;
+ int rtas_poweroff_ups_token = rtas_function_token(RTAS_FN_IBM_POWER_OFF_UPS);
+
+ if (rtas_flash_term_hook)
+ rtas_flash_term_hook(SYS_POWER_OFF);
+
+ if (rtas_poweron_auto == 0 ||
+ rtas_poweroff_ups_token == RTAS_UNKNOWN_SERVICE) {
+ rc = rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1);
+ printk(KERN_INFO "RTAS power-off returned %d\n", rc);
+ } else {
+ rc = rtas_call(rtas_poweroff_ups_token, 0, 1, NULL);
+ printk(KERN_INFO "RTAS ibm,power-off-ups returned %d\n", rc);
+ }
+ for (;;);
+}
+
+static int __init pSeries_probe(void)
+{
+ if (!of_node_is_type(of_root, "chrp"))
+ return 0;
+
+ /* Cell blades firmware claims to be chrp while it's not. Until this
+ * is fixed, we need to avoid those here.
+ */
+ if (of_machine_is_compatible("IBM,CPBW-1.0") ||
+ of_machine_is_compatible("IBM,CBEA"))
+ return 0;
+
+ pm_power_off = pseries_power_off;
+
+ pr_debug("Machine is%s LPAR !\n",
+ (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
+
+ pseries_init();
+
+ return 1;
+}
+
+static int pSeries_pci_probe_mode(struct pci_bus *bus)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ return PCI_PROBE_DEVTREE;
+ return PCI_PROBE_NORMAL;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long pseries_memory_block_size(void)
+{
+ return memory_block_size;
+}
+#endif
+
+struct pci_controller_ops pseries_pci_controller_ops = {
+ .probe_mode = pSeries_pci_probe_mode,
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ .device_group = pSeries_pci_device_group,
+#endif
+};
+
+define_machine(pseries) {
+ .name = "pSeries",
+ .probe = pSeries_probe,
+ .setup_arch = pSeries_setup_arch,
+ .init_IRQ = pseries_init_irq,
+ .show_cpuinfo = pSeries_show_cpuinfo,
+ .log_error = pSeries_log_error,
+ .discover_phbs = pSeries_discover_phbs,
+ .pcibios_fixup = pSeries_final_fixup,
+ .restart = rtas_restart,
+ .halt = rtas_halt,
+ .panic = pseries_panic,
+ .get_boot_time = rtas_get_boot_time,
+ .get_rtc_time = rtas_get_rtc_time,
+ .set_rtc_time = rtas_set_rtc_time,
+ .progress = rtas_progress,
+ .system_reset_exception = pSeries_system_reset_exception,
+ .machine_check_early = pseries_machine_check_realmode,
+ .machine_check_exception = pSeries_machine_check_exception,
+ .machine_check_log_err = pSeries_machine_check_log_err,
+#ifdef CONFIG_KEXEC_CORE
+ .machine_kexec = pseries_machine_kexec,
+ .kexec_cpu_down = pseries_kexec_cpu_down,
+#endif
+#ifdef CONFIG_MEMORY_HOTPLUG
+ .memory_block_size = pseries_memory_block_size,
+#endif
+};
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
new file mode 100644
index 000000000..c597711ef
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SMP support for pSeries machines.
+ *
+ * Dave Engebretsen, Peter Bergner, and
+ * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
+ *
+ * Plus various changes from other IBM teams...
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/cache.h>
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/pgtable.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/paca.h>
+#include <asm/machdep.h>
+#include <asm/cputable.h>
+#include <asm/firmware.h>
+#include <asm/rtas.h>
+#include <asm/vdso_datapage.h>
+#include <asm/cputhreads.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/dbell.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/code-patching.h>
+#include <asm/svm.h>
+#include <asm/kvm_guest.h>
+
+#include "pseries.h"
+
+/*
+ * The Primary thread of each non-boot processor was started from the OF client
+ * interface by prom_hold_cpus and is spinning on secondary_hold_spinloop.
+ */
+static cpumask_var_t of_spin_mask;
+
+/* Query where a cpu is now. Return codes #defined in plpar_wrappers.h */
+int smp_query_cpu_stopped(unsigned int pcpu)
+{
+ int cpu_status, status;
+ int qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE);
+
+ if (qcss_tok == RTAS_UNKNOWN_SERVICE) {
+ printk_once(KERN_INFO
+ "Firmware doesn't support query-cpu-stopped-state\n");
+ return QCSS_HARDWARE_ERROR;
+ }
+
+ status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
+ if (status != 0) {
+ printk(KERN_ERR
+ "RTAS query-cpu-stopped-state failed: %i\n", status);
+ return status;
+ }
+
+ return cpu_status;
+}
+
+/**
+ * smp_startup_cpu() - start the given cpu
+ *
+ * At boot time, there is nothing to do for primary threads which were
+ * started from Open Firmware. For anything else, call RTAS with the
+ * appropriate start location.
+ *
+ * Returns:
+ * 0 - failure
+ * 1 - success
+ */
+static inline int smp_startup_cpu(unsigned int lcpu)
+{
+ int status;
+ unsigned long start_here =
+ __pa(ppc_function_entry(generic_secondary_smp_init));
+ unsigned int pcpu;
+ int start_cpu;
+
+ if (cpumask_test_cpu(lcpu, of_spin_mask))
+ /* Already started by OF and sitting in spin loop */
+ return 1;
+
+ pcpu = get_hard_smp_processor_id(lcpu);
+
+ /* Check to see if the CPU out of FW already for kexec */
+ if (smp_query_cpu_stopped(pcpu) == QCSS_NOT_STOPPED){
+ cpumask_set_cpu(lcpu, of_spin_mask);
+ return 1;
+ }
+
+ /*
+ * If the RTAS start-cpu token does not exist then presume the
+ * cpu is already spinning.
+ */
+ start_cpu = rtas_function_token(RTAS_FN_START_CPU);
+ if (start_cpu == RTAS_UNKNOWN_SERVICE)
+ return 1;
+
+ status = rtas_call(start_cpu, 3, 1, NULL, pcpu, start_here, pcpu);
+ if (status != 0) {
+ printk(KERN_ERR "start-cpu failed: %i\n", status);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void smp_setup_cpu(int cpu)
+{
+ if (xive_enabled())
+ xive_smp_setup_cpu();
+ else if (cpu != boot_cpuid)
+ xics_setup_cpu();
+
+ if (firmware_has_feature(FW_FEATURE_SPLPAR))
+ vpa_init(cpu);
+
+ cpumask_clear_cpu(cpu, of_spin_mask);
+}
+
+static int smp_pSeries_kick_cpu(int nr)
+{
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
+
+ if (!smp_startup_cpu(nr))
+ return -ENOENT;
+
+ /*
+ * The processor is currently spinning, waiting for the
+ * cpu_start field to become non-zero After we set cpu_start,
+ * the processor will continue on to secondary_start
+ */
+ paca_ptrs[nr]->cpu_start = 1;
+
+ return 0;
+}
+
+static int pseries_smp_prepare_cpu(int cpu)
+{
+ if (xive_enabled())
+ return xive_smp_prepare_cpu(cpu);
+ return 0;
+}
+
+/* Cause IPI as setup by the interrupt controller (xics or xive) */
+static void (*ic_cause_ipi)(int cpu) __ro_after_init;
+
+/* Use msgsndp doorbells target is a sibling, else use interrupt controller */
+static void dbell_or_ic_cause_ipi(int cpu)
+{
+ if (doorbell_try_core_ipi(cpu))
+ return;
+
+ ic_cause_ipi(cpu);
+}
+
+static int pseries_cause_nmi_ipi(int cpu)
+{
+ int hwcpu;
+
+ if (cpu == NMI_IPI_ALL_OTHERS) {
+ hwcpu = H_SIGNAL_SYS_RESET_ALL_OTHERS;
+ } else {
+ if (cpu < 0) {
+ WARN_ONCE(true, "incorrect cpu parameter %d", cpu);
+ return 0;
+ }
+
+ hwcpu = get_hard_smp_processor_id(cpu);
+ }
+
+ if (plpar_signal_sys_reset(hwcpu) == H_SUCCESS)
+ return 1;
+
+ return 0;
+}
+
+static __init void pSeries_smp_probe(void)
+{
+ if (xive_enabled())
+ xive_smp_probe();
+ else
+ xics_smp_probe();
+
+ /* No doorbell facility, must use the interrupt controller for IPIs */
+ if (!cpu_has_feature(CPU_FTR_DBELL))
+ return;
+
+ /* Doorbells can only be used for IPIs between SMT siblings */
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
+ check_kvm_guest();
+
+ if (is_kvm_guest()) {
+ /*
+ * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
+ * faults to the hypervisor which then reads the instruction
+ * from guest memory, which tends to be slower than using XIVE.
+ */
+ if (xive_enabled())
+ return;
+
+ /*
+ * XICS hcalls aren't as fast, so we can use msgsndp (which
+ * also helps exercise KVM emulation), however KVM can't
+ * emulate secure guests because it can't read the instruction
+ * out of their memory.
+ */
+ if (is_secure_guest())
+ return;
+ }
+
+ /*
+ * Under PowerVM, FSCR[MSGP] is enabled as guest vCPU siblings are
+ * gang scheduled on the same physical core, so doorbells are always
+ * faster than the interrupt controller, and they can be used by
+ * secure guests.
+ */
+
+ ic_cause_ipi = smp_ops->cause_ipi;
+ smp_ops->cause_ipi = dbell_or_ic_cause_ipi;
+}
+
+static struct smp_ops_t pseries_smp_ops = {
+ .message_pass = NULL, /* Use smp_muxed_ipi_message_pass */
+ .cause_ipi = NULL, /* Filled at runtime by pSeries_smp_probe() */
+ .cause_nmi_ipi = pseries_cause_nmi_ipi,
+ .probe = pSeries_smp_probe,
+ .prepare_cpu = pseries_smp_prepare_cpu,
+ .kick_cpu = smp_pSeries_kick_cpu,
+ .setup_cpu = smp_setup_cpu,
+ .cpu_bootable = smp_generic_cpu_bootable,
+};
+
+/* This is called very early */
+void __init smp_init_pseries(void)
+{
+ int i;
+
+ pr_debug(" -> smp_init_pSeries()\n");
+ smp_ops = &pseries_smp_ops;
+
+ alloc_bootmem_cpumask_var(&of_spin_mask);
+
+ /*
+ * Mark threads which are still spinning in hold loops
+ *
+ * We know prom_init will not have started them if RTAS supports
+ * query-cpu-stopped-state.
+ */
+ if (rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE) == RTAS_UNKNOWN_SERVICE) {
+ if (cpu_has_feature(CPU_FTR_SMT)) {
+ for_each_present_cpu(i) {
+ if (cpu_thread_in_core(i) == 0)
+ cpumask_set_cpu(i, of_spin_mask);
+ }
+ } else
+ cpumask_copy(of_spin_mask, cpu_present_mask);
+
+ cpumask_clear_cpu(boot_cpuid, of_spin_mask);
+ }
+
+ pr_debug(" <- smp_init_pSeries()\n");
+}
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
new file mode 100644
index 000000000..5c4343547
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2010 Brian King IBM Corporation
+ */
+
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/suspend.h>
+#include <linux/stat.h>
+#include <asm/firmware.h>
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/rtas.h>
+#include <asm/topology.h>
+
+static struct device suspend_dev;
+
+/**
+ * pseries_suspend_begin - First phase of hibernation
+ *
+ * Check to ensure we are in a valid state to hibernate
+ *
+ * Return value:
+ * 0 on success / other on failure
+ **/
+static int pseries_suspend_begin(u64 stream_id)
+{
+ long vasi_state, rc;
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+
+ /* Make sure the state is valid */
+ rc = plpar_hcall(H_VASI_STATE, retbuf, stream_id);
+
+ vasi_state = retbuf[0];
+
+ if (rc) {
+ pr_err("pseries_suspend_begin: vasi_state returned %ld\n",rc);
+ return rc;
+ } else if (vasi_state == H_VASI_ENABLED) {
+ return -EAGAIN;
+ } else if (vasi_state != H_VASI_SUSPENDING) {
+ pr_err("pseries_suspend_begin: vasi_state returned state %ld\n",
+ vasi_state);
+ return -EIO;
+ }
+ return 0;
+}
+
+/**
+ * pseries_suspend_enter - Final phase of hibernation
+ *
+ * Return value:
+ * 0 on success / other on failure
+ **/
+static int pseries_suspend_enter(suspend_state_t state)
+{
+ return rtas_ibm_suspend_me(NULL);
+}
+
+/**
+ * store_hibernate - Initiate partition hibernation
+ * @dev: subsys root device
+ * @attr: device attribute struct
+ * @buf: buffer
+ * @count: buffer size
+ *
+ * Write the stream ID received from the HMC to this file
+ * to trigger hibernating the partition
+ *
+ * Return value:
+ * number of bytes printed to buffer / other on failure
+ **/
+static ssize_t store_hibernate(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u64 stream_id;
+ int rc;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ stream_id = simple_strtoul(buf, NULL, 16);
+
+ do {
+ rc = pseries_suspend_begin(stream_id);
+ if (rc == -EAGAIN)
+ ssleep(1);
+ } while (rc == -EAGAIN);
+
+ if (!rc)
+ rc = pm_suspend(PM_SUSPEND_MEM);
+
+ if (!rc) {
+ rc = count;
+ post_mobility_fixup();
+ }
+
+
+ return rc;
+}
+
+#define USER_DT_UPDATE 0
+#define KERN_DT_UPDATE 1
+
+/**
+ * show_hibernate - Report device tree update responsibilty
+ * @dev: subsys root device
+ * @attr: device attribute struct
+ * @buf: buffer
+ *
+ * Report whether a device tree update is performed by the kernel after a
+ * resume, or if drmgr must coordinate the update from user space.
+ *
+ * Return value:
+ * 0 if drmgr is to initiate update, and 1 otherwise
+ **/
+static ssize_t show_hibernate(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", KERN_DT_UPDATE);
+}
+
+static DEVICE_ATTR(hibernate, 0644, show_hibernate, store_hibernate);
+
+static struct bus_type suspend_subsys = {
+ .name = "power",
+ .dev_name = "power",
+};
+
+static const struct platform_suspend_ops pseries_suspend_ops = {
+ .valid = suspend_valid_only_mem,
+ .enter = pseries_suspend_enter,
+};
+
+/**
+ * pseries_suspend_sysfs_register - Register with sysfs
+ *
+ * Return value:
+ * 0 on success / other on failure
+ **/
+static int pseries_suspend_sysfs_register(struct device *dev)
+{
+ struct device *dev_root;
+ int rc;
+
+ if ((rc = subsys_system_register(&suspend_subsys, NULL)))
+ return rc;
+
+ dev->id = 0;
+ dev->bus = &suspend_subsys;
+
+ dev_root = bus_get_dev_root(&suspend_subsys);
+ if (dev_root) {
+ rc = device_create_file(dev_root, &dev_attr_hibernate);
+ put_device(dev_root);
+ if (rc)
+ goto subsys_unregister;
+ }
+
+ return 0;
+
+subsys_unregister:
+ bus_unregister(&suspend_subsys);
+ return rc;
+}
+
+/**
+ * pseries_suspend_init - initcall for pSeries suspend
+ *
+ * Return value:
+ * 0 on success / other on failure
+ **/
+static int __init pseries_suspend_init(void)
+{
+ int rc;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ return 0;
+
+ if ((rc = pseries_suspend_sysfs_register(&suspend_dev)))
+ return rc;
+
+ suspend_set_ops(&pseries_suspend_ops);
+ return 0;
+}
+machine_device_initcall(pseries, pseries_suspend_init);
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
new file mode 100644
index 000000000..3b4045d50
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Secure VM platform
+ *
+ * Copyright 2018 IBM Corporation
+ * Author: Anshuman Khandual <khandual@linux.vnet.ibm.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/cc_platform.h>
+#include <asm/machdep.h>
+#include <asm/svm.h>
+#include <asm/swiotlb.h>
+#include <asm/ultravisor.h>
+#include <asm/dtl.h>
+
+static int __init init_svm(void)
+{
+ if (!is_secure_guest())
+ return 0;
+
+ /* Don't release the SWIOTLB buffer. */
+ ppc_swiotlb_enable = 1;
+
+ /*
+ * Since the guest memory is inaccessible to the host, devices always
+ * need to use the SWIOTLB buffer for DMA even if dma_capable() says
+ * otherwise.
+ */
+ ppc_swiotlb_flags |= SWIOTLB_ANY | SWIOTLB_FORCE;
+
+ /* Share the SWIOTLB buffer with the host. */
+ swiotlb_update_mem_attributes();
+
+ return 0;
+}
+machine_early_initcall(pseries, init_svm);
+
+int set_memory_encrypted(unsigned long addr, int numpages)
+{
+ if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+ return 0;
+
+ if (!PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ uv_unshare_page(PHYS_PFN(__pa(addr)), numpages);
+
+ return 0;
+}
+
+int set_memory_decrypted(unsigned long addr, int numpages)
+{
+ if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+ return 0;
+
+ if (!PAGE_ALIGNED(addr))
+ return -EINVAL;
+
+ uv_share_page(PHYS_PFN(__pa(addr)), numpages);
+
+ return 0;
+}
+
+/* There's one dispatch log per CPU. */
+#define NR_DTL_PAGE (DISPATCH_LOG_BYTES * CONFIG_NR_CPUS / PAGE_SIZE)
+
+static struct page *dtl_page_store[NR_DTL_PAGE];
+static long dtl_nr_pages;
+
+static bool is_dtl_page_shared(struct page *page)
+{
+ long i;
+
+ for (i = 0; i < dtl_nr_pages; i++)
+ if (dtl_page_store[i] == page)
+ return true;
+
+ return false;
+}
+
+void dtl_cache_ctor(void *addr)
+{
+ unsigned long pfn = PHYS_PFN(__pa(addr));
+ struct page *page = pfn_to_page(pfn);
+
+ if (!is_dtl_page_shared(page)) {
+ dtl_page_store[dtl_nr_pages] = page;
+ dtl_nr_pages++;
+ WARN_ON(dtl_nr_pages >= NR_DTL_PAGE);
+ uv_share_page(pfn, 1);
+ }
+}
diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c
new file mode 100644
index 000000000..f9f682724
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas-sysfs.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2022-23 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include "vas.h"
+
+#ifdef CONFIG_SYSFS
+static struct kobject *pseries_vas_kobj;
+static struct kobject *gzip_caps_kobj;
+
+struct vas_caps_entry {
+ struct kobject kobj;
+ struct vas_cop_feat_caps *caps;
+};
+
+#define to_caps_entry(entry) container_of(entry, struct vas_caps_entry, kobj)
+
+/*
+ * This function is used to get the notification from the drmgr when
+ * QoS credits are changed.
+ */
+static ssize_t update_total_credits_store(struct vas_cop_feat_caps *caps,
+ const char *buf, size_t count)
+{
+ int err;
+ u16 creds;
+
+ err = kstrtou16(buf, 0, &creds);
+ /*
+ * The user space interface from the management console
+ * notifies OS with the new QoS credits and then the
+ * hypervisor. So OS has to use this new credits value
+ * and reconfigure VAS windows (close or reopen depends
+ * on the credits available) instead of depending on VAS
+ * QoS capabilities from the hypervisor.
+ */
+ if (!err)
+ err = vas_reconfig_capabilties(caps->win_type, creds);
+
+ if (err)
+ return -EINVAL;
+
+ pr_info("Set QoS total credits %u\n", creds);
+
+ return count;
+}
+
+#define sysfs_caps_entry_read(_name) \
+static ssize_t _name##_show(struct vas_cop_feat_caps *caps, char *buf) \
+{ \
+ return sprintf(buf, "%d\n", atomic_read(&caps->_name)); \
+}
+
+struct vas_sysfs_entry {
+ struct attribute attr;
+ ssize_t (*show)(struct vas_cop_feat_caps *, char *);
+ ssize_t (*store)(struct vas_cop_feat_caps *, const char *, size_t);
+};
+
+#define VAS_ATTR_RO(_name) \
+ sysfs_caps_entry_read(_name); \
+ static struct vas_sysfs_entry _name##_attribute = __ATTR(_name, \
+ 0444, _name##_show, NULL);
+
+/*
+ * Create sysfs interface:
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities
+ * This directory contains the following VAS GZIP capabilities
+ * for the default credit type.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities/nr_total_credits
+ * Total number of default credits assigned to the LPAR which
+ * can be changed with DLPAR operation.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities/nr_used_credits
+ * Number of credits used by the user space. One credit will
+ * be assigned for each window open.
+ *
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities
+ * This directory contains the following VAS GZIP capabilities
+ * for the Quality of Service (QoS) credit type.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/nr_total_credits
+ * Total number of QoS credits assigned to the LPAR. The user
+ * has to define this value using HMC interface. It can be
+ * changed dynamically by the user.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/nr_used_credits
+ * Number of credits used by the user space.
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/update_total_credits
+ * Update total QoS credits dynamically
+ */
+
+VAS_ATTR_RO(nr_total_credits);
+VAS_ATTR_RO(nr_used_credits);
+
+static struct vas_sysfs_entry update_total_credits_attribute =
+ __ATTR(update_total_credits, 0200, NULL, update_total_credits_store);
+
+static struct attribute *vas_def_capab_attrs[] = {
+ &nr_total_credits_attribute.attr,
+ &nr_used_credits_attribute.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vas_def_capab);
+
+static struct attribute *vas_qos_capab_attrs[] = {
+ &nr_total_credits_attribute.attr,
+ &nr_used_credits_attribute.attr,
+ &update_total_credits_attribute.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vas_qos_capab);
+
+static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct vas_caps_entry *centry;
+ struct vas_cop_feat_caps *caps;
+ struct vas_sysfs_entry *entry;
+
+ centry = to_caps_entry(kobj);
+ caps = centry->caps;
+ entry = container_of(attr, struct vas_sysfs_entry, attr);
+
+ if (!entry->show)
+ return -EIO;
+
+ return entry->show(caps, buf);
+}
+
+static ssize_t vas_type_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vas_caps_entry *centry;
+ struct vas_cop_feat_caps *caps;
+ struct vas_sysfs_entry *entry;
+
+ centry = to_caps_entry(kobj);
+ caps = centry->caps;
+ entry = container_of(attr, struct vas_sysfs_entry, attr);
+ if (!entry->store)
+ return -EIO;
+
+ return entry->store(caps, buf, count);
+}
+
+static void vas_type_release(struct kobject *kobj)
+{
+ struct vas_caps_entry *centry = to_caps_entry(kobj);
+ kfree(centry);
+}
+
+static const struct sysfs_ops vas_sysfs_ops = {
+ .show = vas_type_show,
+ .store = vas_type_store,
+};
+
+static struct kobj_type vas_def_attr_type = {
+ .release = vas_type_release,
+ .sysfs_ops = &vas_sysfs_ops,
+ .default_groups = vas_def_capab_groups,
+};
+
+static struct kobj_type vas_qos_attr_type = {
+ .release = vas_type_release,
+ .sysfs_ops = &vas_sysfs_ops,
+ .default_groups = vas_qos_capab_groups,
+};
+
+static char *vas_caps_kobj_name(struct vas_caps_entry *centry,
+ struct kobject **kobj)
+{
+ struct vas_cop_feat_caps *caps = centry->caps;
+
+ if (caps->descriptor == VAS_GZIP_QOS_CAPABILITIES) {
+ kobject_init(&centry->kobj, &vas_qos_attr_type);
+ *kobj = gzip_caps_kobj;
+ return "qos_capabilities";
+ } else if (caps->descriptor == VAS_GZIP_DEFAULT_CAPABILITIES) {
+ kobject_init(&centry->kobj, &vas_def_attr_type);
+ *kobj = gzip_caps_kobj;
+ return "default_capabilities";
+ } else
+ return "Unknown";
+}
+
+/*
+ * Add feature specific capability dir entry.
+ * Ex: VDefGzip or VQosGzip
+ */
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps)
+{
+ struct vas_caps_entry *centry;
+ struct kobject *kobj = NULL;
+ int ret = 0;
+ char *name;
+
+ centry = kzalloc(sizeof(*centry), GFP_KERNEL);
+ if (!centry)
+ return -ENOMEM;
+
+ centry->caps = caps;
+ name = vas_caps_kobj_name(centry, &kobj);
+
+ if (kobj) {
+ ret = kobject_add(&centry->kobj, kobj, "%s", name);
+
+ if (ret) {
+ pr_err("VAS: sysfs kobject add / event failed %d\n",
+ ret);
+ kobject_put(&centry->kobj);
+ }
+ }
+
+ return ret;
+}
+
+static struct miscdevice vas_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "vas",
+};
+
+/*
+ * Add VAS and VasCaps (overall capabilities) dir entries.
+ */
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps)
+{
+ int ret;
+
+ ret = misc_register(&vas_miscdev);
+ if (ret < 0) {
+ pr_err("%s: register vas misc device failed\n", __func__);
+ return ret;
+ }
+
+ /*
+ * The hypervisor does not expose multiple VAS instances, but can
+ * see multiple VAS instances on PowerNV. So create 'vas0' directory
+ * on pseries.
+ */
+ pseries_vas_kobj = kobject_create_and_add("vas0",
+ &vas_miscdev.this_device->kobj);
+ if (!pseries_vas_kobj) {
+ misc_deregister(&vas_miscdev);
+ pr_err("Failed to create VAS sysfs entry\n");
+ return -ENOMEM;
+ }
+
+ if ((vas_caps->feat_type & VAS_GZIP_QOS_FEAT_BIT) ||
+ (vas_caps->feat_type & VAS_GZIP_DEF_FEAT_BIT)) {
+ gzip_caps_kobj = kobject_create_and_add("gzip",
+ pseries_vas_kobj);
+ if (!gzip_caps_kobj) {
+ pr_err("Failed to create VAS GZIP capability entry\n");
+ kobject_put(pseries_vas_kobj);
+ misc_deregister(&vas_miscdev);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+#else
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps)
+{
+ return 0;
+}
+
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps)
+{
+ return 0;
+}
+#endif
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
new file mode 100644
index 000000000..71d52a670
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -0,0 +1,1121 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#define pr_fmt(fmt) "vas: " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/firmware.h>
+#include <asm/vphn.h>
+#include <asm/vas.h>
+#include "vas.h"
+
+#define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
+#define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul
+/* The hypervisor allows one credit per window right now */
+#define DEF_WIN_CREDS 1
+
+static struct vas_all_caps caps_all;
+static bool copypaste_feat;
+static struct hv_vas_cop_feat_caps hv_cop_caps;
+
+static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
+static DEFINE_MUTEX(vas_pseries_mutex);
+static bool migration_in_progress;
+
+static long hcall_return_busy_check(long rc)
+{
+ /* Check if we are stalled for some time */
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ } else if (rc == H_BUSY) {
+ cond_resched();
+ }
+
+ return rc;
+}
+
+/*
+ * Allocate VAS window hcall
+ */
+static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
+ u8 wintype, u16 credits)
+{
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+ long rc;
+
+ do {
+ rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
+ credits, domain[0], domain[1], domain[2],
+ domain[3], domain[4], domain[5]);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS) {
+ if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
+ pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
+ return -ENOTSUPP;
+ }
+ win->vas_win.winid = retbuf[0];
+ win->win_addr = retbuf[1];
+ win->complete_irq = retbuf[2];
+ win->fault_irq = retbuf[3];
+ return 0;
+ }
+
+ pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
+ rc, wintype, credits);
+
+ return -EIO;
+}
+
+/*
+ * Deallocate VAS window hcall.
+ */
+static int h_deallocate_vas_window(u64 winid)
+{
+ long rc;
+
+ do {
+ rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
+ rc, winid);
+ return -EIO;
+}
+
+/*
+ * Modify VAS window.
+ * After the window is opened with allocate window hcall, configure it
+ * with flags and LPAR PID before using.
+ */
+static int h_modify_vas_window(struct pseries_vas_window *win)
+{
+ long rc;
+
+ /*
+ * AMR value is not supported in Linux VAS implementation.
+ * The hypervisor ignores it if 0 is passed.
+ */
+ do {
+ rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
+ win->vas_win.winid, win->pid, 0,
+ VAS_MOD_WIN_FLAGS, 0);
+
+ rc = hcall_return_busy_check(rc);
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
+ rc, win->vas_win.winid, win->pid);
+ return -EIO;
+}
+
+/*
+ * This hcall is used to determine the capabilities from the hypervisor.
+ * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
+ * @query_type: If 0 is passed, the hypervisor returns the overall
+ * capabilities which provides all feature(s) that are
+ * available. Then query the hypervisor to get the
+ * corresponding capabilities for the specific feature.
+ * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
+ * and VAS GZIP Default capabilities.
+ * H_QUERY_NX_CAPABILITIES provides NX GZIP
+ * capabilities.
+ * @result: Return buffer to save capabilities.
+ */
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
+{
+ long rc;
+
+ rc = plpar_hcall_norets(hcall, query_type, result);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ /* H_FUNCTION means HV does not support VAS so don't print an error */
+ if (rc != H_FUNCTION) {
+ pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
+ (hcall == H_QUERY_VAS_CAPABILITIES) ?
+ "H_QUERY_VAS_CAPABILITIES" :
+ "H_QUERY_NX_CAPABILITIES",
+ rc, query_type, result);
+ }
+
+ return -EIO;
+}
+EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
+
+/*
+ * hcall to get fault CRB from the hypervisor.
+ */
+static int h_get_nx_fault(u32 winid, u64 buffer)
+{
+ long rc;
+
+ rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
+
+ if (rc == H_SUCCESS)
+ return 0;
+
+ pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
+ rc, winid, buffer);
+ return -EIO;
+
+}
+
+/*
+ * Handle the fault interrupt.
+ * When the fault interrupt is received for each window, query the
+ * hypervisor to get the fault CRB on the specific fault. Then
+ * process the CRB by updating CSB or send signal if the user space
+ * CSB is invalid.
+ * Note: The hypervisor forwards an interrupt for each fault request.
+ * So one fault CRB to process for each H_GET_NX_FAULT hcall.
+ */
+static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
+{
+ struct pseries_vas_window *txwin = data;
+ struct coprocessor_request_block crb;
+ struct vas_user_win_ref *tsk_ref;
+ int rc;
+
+ while (atomic_read(&txwin->pending_faults)) {
+ rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
+ if (!rc) {
+ tsk_ref = &txwin->vas_win.task_ref;
+ vas_dump_crb(&crb);
+ vas_update_csb(&crb, tsk_ref);
+ }
+ atomic_dec(&txwin->pending_faults);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * irq_default_primary_handler() can be used only with IRQF_ONESHOT
+ * which disables IRQ before executing the thread handler and enables
+ * it after. But this disabling interrupt sets the VAS IRQ OFF
+ * state in the hypervisor. If the NX generates fault interrupt
+ * during this window, the hypervisor will not deliver this
+ * interrupt to the LPAR. So use VAS specific IRQ handler instead
+ * of calling the default primary handler.
+ */
+static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
+{
+ struct pseries_vas_window *txwin = data;
+
+ /*
+ * The thread hanlder will process this interrupt if it is
+ * already running.
+ */
+ atomic_inc(&txwin->pending_faults);
+
+ return IRQ_WAKE_THREAD;
+}
+
+/*
+ * Allocate window and setup IRQ mapping.
+ */
+static int allocate_setup_window(struct pseries_vas_window *txwin,
+ u64 *domain, u8 wintype)
+{
+ int rc;
+
+ rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
+ if (rc)
+ return rc;
+ /*
+ * On PowerVM, the hypervisor setup and forwards the fault
+ * interrupt per window. So the IRQ setup and fault handling
+ * will be done for each open window separately.
+ */
+ txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
+ if (!txwin->fault_virq) {
+ pr_err("Failed irq mapping %d\n", txwin->fault_irq);
+ rc = -EINVAL;
+ goto out_win;
+ }
+
+ txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
+ txwin->vas_win.winid);
+ if (!txwin->name) {
+ rc = -ENOMEM;
+ goto out_irq;
+ }
+
+ rc = request_threaded_irq(txwin->fault_virq,
+ pseries_vas_irq_handler,
+ pseries_vas_fault_thread_fn, 0,
+ txwin->name, txwin);
+ if (rc) {
+ pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
+ txwin->vas_win.winid, txwin->fault_virq, rc);
+ goto out_free;
+ }
+
+ txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
+
+ return 0;
+out_free:
+ kfree(txwin->name);
+out_irq:
+ irq_dispose_mapping(txwin->fault_virq);
+out_win:
+ h_deallocate_vas_window(txwin->vas_win.winid);
+ return rc;
+}
+
+static inline void free_irq_setup(struct pseries_vas_window *txwin)
+{
+ free_irq(txwin->fault_virq, txwin);
+ kfree(txwin->name);
+ irq_dispose_mapping(txwin->fault_virq);
+}
+
+static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
+ enum vas_cop_type cop_type)
+{
+ long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+ struct vas_cop_feat_caps *cop_feat_caps;
+ struct vas_caps *caps;
+ struct pseries_vas_window *txwin;
+ int rc;
+
+ txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
+ if (!txwin)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * A VAS window can have many credits which means that many
+ * requests can be issued simultaneously. But the hypervisor
+ * restricts one credit per window.
+ * The hypervisor introduces 2 different types of credits:
+ * Default credit type (Uses normal priority FIFO):
+ * A limited number of credits are assigned to partitions
+ * based on processor entitlement. But these credits may be
+ * over-committed on a system depends on whether the CPUs
+ * are in shared or dedicated modes - that is, more requests
+ * may be issued across the system than NX can service at
+ * once which can result in paste command failure (RMA_busy).
+ * Then the process has to resend requests or fall-back to
+ * SW compression.
+ * Quality of Service (QoS) credit type (Uses high priority FIFO):
+ * To avoid NX HW contention, the system admins can assign
+ * QoS credits for each LPAR so that this partition is
+ * guaranteed access to NX resources. These credits are
+ * assigned to partitions via the HMC.
+ * Refer PAPR for more information.
+ *
+ * Allocate window with QoS credits if user requested. Otherwise
+ * default credits are used.
+ */
+ if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
+ caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
+ else
+ caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
+
+ cop_feat_caps = &caps->caps;
+
+ if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
+ atomic_read(&cop_feat_caps->nr_total_credits)) {
+ pr_err_ratelimited("Credits are not available to allocate window\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (vas_id == -1) {
+ /*
+ * The user space is requesting to allocate a window on
+ * a VAS instance where the process is executing.
+ * On PowerVM, domain values are passed to the hypervisor
+ * to select VAS instance. Useful if the process is
+ * affinity to NUMA node.
+ * The hypervisor selects VAS instance if
+ * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
+ * The h_allocate_vas_window hcall is defined to take a
+ * domain values as specified by h_home_node_associativity,
+ * So no unpacking needs to be done.
+ */
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
+ VPHN_FLAG_VCPU, hard_smp_processor_id());
+ if (rc != H_SUCCESS) {
+ pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
+ goto out;
+ }
+ }
+
+ txwin->pid = mfspr(SPRN_PID);
+
+ /*
+ * Allocate / Deallocate window hcalls and setup / free IRQs
+ * have to be protected with mutex.
+ * Open VAS window: Allocate window hcall and setup IRQ
+ * Close VAS window: Deallocate window hcall and free IRQ
+ * The hypervisor waits until all NX requests are
+ * completed before closing the window. So expects OS
+ * to handle NX faults, means IRQ can be freed only
+ * after the deallocate window hcall is returned.
+ * So once the window is closed with deallocate hcall before
+ * the IRQ is freed, it can be assigned to new allocate
+ * hcall with the same fault IRQ by the hypervisor. It can
+ * result in setup IRQ fail for the new window since the
+ * same fault IRQ is not freed by the OS before.
+ */
+ mutex_lock(&vas_pseries_mutex);
+ if (migration_in_progress) {
+ rc = -EBUSY;
+ } else {
+ rc = allocate_setup_window(txwin, (u64 *)&domain[0],
+ cop_feat_caps->win_type);
+ if (!rc)
+ caps->nr_open_wins_progress++;
+ }
+
+ mutex_unlock(&vas_pseries_mutex);
+ if (rc)
+ goto out;
+
+ /*
+ * Modify window and it is ready to use.
+ */
+ rc = h_modify_vas_window(txwin);
+ if (!rc)
+ rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
+ if (rc)
+ goto out_free;
+
+ txwin->win_type = cop_feat_caps->win_type;
+
+ /*
+ * The migration SUSPEND thread sets migration_in_progress and
+ * closes all open windows from the list. But the window is
+ * added to the list after open and modify HCALLs. So possible
+ * that migration_in_progress is set before modify HCALL which
+ * may cause some windows are still open when the hypervisor
+ * initiates the migration.
+ * So checks the migration_in_progress flag again and close all
+ * open windows.
+ *
+ * Possible to lose the acquired credit with DLPAR core
+ * removal after the window is opened. So if there are any
+ * closed windows (means with lost credits), do not give new
+ * window to user space. New windows will be opened only
+ * after the existing windows are reopened when credits are
+ * available.
+ */
+ mutex_lock(&vas_pseries_mutex);
+ if (!caps->nr_close_wins && !migration_in_progress) {
+ list_add(&txwin->win_list, &caps->list);
+ caps->nr_open_windows++;
+ caps->nr_open_wins_progress--;
+ mutex_unlock(&vas_pseries_mutex);
+ vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
+ return &txwin->vas_win;
+ }
+ mutex_unlock(&vas_pseries_mutex);
+
+ put_vas_user_win_ref(&txwin->vas_win.task_ref);
+ rc = -EBUSY;
+ pr_err_ratelimited("No credit is available to allocate window\n");
+
+out_free:
+ /*
+ * Window is not operational. Free IRQ before closing
+ * window so that do not have to hold mutex.
+ */
+ free_irq_setup(txwin);
+ h_deallocate_vas_window(txwin->vas_win.winid);
+ /*
+ * Hold mutex and reduce nr_open_wins_progress counter.
+ */
+ mutex_lock(&vas_pseries_mutex);
+ caps->nr_open_wins_progress--;
+ mutex_unlock(&vas_pseries_mutex);
+out:
+ atomic_dec(&cop_feat_caps->nr_used_credits);
+ kfree(txwin);
+ return ERR_PTR(rc);
+}
+
+static u64 vas_paste_address(struct vas_window *vwin)
+{
+ struct pseries_vas_window *win;
+
+ win = container_of(vwin, struct pseries_vas_window, vas_win);
+ return win->win_addr;
+}
+
+static int deallocate_free_window(struct pseries_vas_window *win)
+{
+ int rc = 0;
+
+ /*
+ * The hypervisor waits for all requests including faults
+ * are processed before closing the window - Means all
+ * credits have to be returned. In the case of fault
+ * request, a credit is returned after OS issues
+ * H_GET_NX_FAULT hcall.
+ * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
+ * hcall.
+ */
+ rc = h_deallocate_vas_window(win->vas_win.winid);
+ if (!rc)
+ free_irq_setup(win);
+
+ return rc;
+}
+
+static int vas_deallocate_window(struct vas_window *vwin)
+{
+ struct pseries_vas_window *win;
+ struct vas_cop_feat_caps *caps;
+ int rc = 0;
+
+ if (!vwin)
+ return -EINVAL;
+
+ win = container_of(vwin, struct pseries_vas_window, vas_win);
+
+ /* Should not happen */
+ if (win->win_type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Window (%u): Invalid window type %u\n",
+ vwin->winid, win->win_type);
+ return -EINVAL;
+ }
+
+ caps = &vascaps[win->win_type].caps;
+ mutex_lock(&vas_pseries_mutex);
+ /*
+ * VAS window is already closed in the hypervisor when
+ * lost the credit or with migration. So just remove the entry
+ * from the list, remove task references and free vas_window
+ * struct.
+ */
+ if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
+ !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
+ rc = deallocate_free_window(win);
+ if (rc) {
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+ }
+ } else
+ vascaps[win->win_type].nr_close_wins--;
+
+ list_del(&win->win_list);
+ atomic_dec(&caps->nr_used_credits);
+ vascaps[win->win_type].nr_open_windows--;
+ mutex_unlock(&vas_pseries_mutex);
+
+ mm_context_remove_vas_window(vwin->task_ref.mm);
+ put_vas_user_win_ref(&vwin->task_ref);
+
+ kfree(win);
+ return 0;
+}
+
+static const struct vas_user_win_ops vops_pseries = {
+ .open_win = vas_allocate_window, /* Open and configure window */
+ .paste_addr = vas_paste_address, /* To do copy/paste */
+ .close_win = vas_deallocate_window, /* Close window */
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
+ const char *name)
+{
+ if (!copypaste_feat)
+ return -ENOTSUPP;
+
+ return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
+}
+EXPORT_SYMBOL_GPL(vas_register_api_pseries);
+
+void vas_unregister_api_pseries(void)
+{
+ vas_unregister_coproc_api();
+}
+EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
+
+/*
+ * Get the specific capabilities based on the feature type.
+ * Right now supports GZIP default and GZIP QoS capabilities.
+ */
+static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
+ struct hv_vas_cop_feat_caps *hv_caps)
+{
+ struct vas_cop_feat_caps *caps;
+ struct vas_caps *vcaps;
+ int rc = 0;
+
+ vcaps = &vascaps[type];
+ memset(vcaps, 0, sizeof(*vcaps));
+ INIT_LIST_HEAD(&vcaps->list);
+
+ vcaps->feat = feat;
+ caps = &vcaps->caps;
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
+ (u64)virt_to_phys(hv_caps));
+ if (rc)
+ return rc;
+
+ caps->user_mode = hv_caps->user_mode;
+ if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
+ pr_err("User space COPY/PASTE is not supported\n");
+ return -ENOTSUPP;
+ }
+
+ caps->descriptor = be64_to_cpu(hv_caps->descriptor);
+ caps->win_type = hv_caps->win_type;
+ if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Unsupported window type %u\n", caps->win_type);
+ return -EINVAL;
+ }
+ caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
+ caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
+ atomic_set(&caps->nr_total_credits,
+ be16_to_cpu(hv_caps->target_lpar_creds));
+ if (feat == VAS_GZIP_DEF_FEAT) {
+ caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
+
+ if (caps->max_win_creds < DEF_WIN_CREDS) {
+ pr_err("Window creds(%u) > max allowed window creds(%u)\n",
+ DEF_WIN_CREDS, caps->max_win_creds);
+ return -EINVAL;
+ }
+ }
+
+ rc = sysfs_add_vas_caps(caps);
+ if (rc)
+ return rc;
+
+ copypaste_feat = true;
+
+ return 0;
+}
+
+/*
+ * VAS windows can be closed due to lost credits when the core is
+ * removed. So reopen them if credits are available due to DLPAR
+ * core add and set the window active status. When NX sees the page
+ * fault on the unmapped paste address, the kernel handles the fault
+ * by setting the remapping to new paste address if the window is
+ * active.
+ */
+static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
+ bool migrate)
+{
+ long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
+ struct vas_cop_feat_caps *caps = &vcaps->caps;
+ struct pseries_vas_window *win = NULL, *tmp;
+ int rc, mv_ents = 0;
+ int flag;
+
+ /*
+ * Nothing to do if there are no closed windows.
+ */
+ if (!vcaps->nr_close_wins)
+ return 0;
+
+ /*
+ * For the core removal, the hypervisor reduces the credits
+ * assigned to the LPAR and the kernel closes VAS windows
+ * in the hypervisor depends on reduced credits. The kernel
+ * uses LIFO (the last windows that are opened will be closed
+ * first) and expects to open in the same order when credits
+ * are available.
+ * For example, 40 windows are closed when the LPAR lost 2 cores
+ * (dedicated). If 1 core is added, this LPAR can have 20 more
+ * credits. It means the kernel can reopen 20 windows. So move
+ * 20 entries in the VAS windows lost and reopen next 20 windows.
+ * For partition migration, reopen all windows that are closed
+ * during resume.
+ */
+ if ((vcaps->nr_close_wins > creds) && !migrate)
+ mv_ents = vcaps->nr_close_wins - creds;
+
+ list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
+ if (!mv_ents)
+ break;
+
+ mv_ents--;
+ }
+
+ /*
+ * Open windows if they are closed only with migration or
+ * DLPAR (lost credit) before.
+ */
+ if (migrate)
+ flag = VAS_WIN_MIGRATE_CLOSE;
+ else
+ flag = VAS_WIN_NO_CRED_CLOSE;
+
+ list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
+ /*
+ * This window is closed with DLPAR and migration events.
+ * So reopen the window with the last event.
+ * The user space is not suspended with the current
+ * migration notifier. So the user space can issue DLPAR
+ * CPU hotplug while migration in progress. In this case
+ * this window will be opened with the last event.
+ */
+ if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
+ (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
+ win->vas_win.status &= ~flag;
+ continue;
+ }
+
+ /*
+ * Nothing to do on this window if it is not closed
+ * with this flag
+ */
+ if (!(win->vas_win.status & flag))
+ continue;
+
+ rc = allocate_setup_window(win, (u64 *)&domain[0],
+ caps->win_type);
+ if (rc)
+ return rc;
+
+ rc = h_modify_vas_window(win);
+ if (rc)
+ goto out;
+
+ mutex_lock(&win->vas_win.task_ref.mmap_mutex);
+ /*
+ * Set window status to active
+ */
+ win->vas_win.status &= ~flag;
+ mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
+ win->win_type = caps->win_type;
+ if (!--vcaps->nr_close_wins)
+ break;
+ }
+
+ return 0;
+out:
+ /*
+ * Window modify HCALL failed. So close the window to the
+ * hypervisor and return.
+ */
+ free_irq_setup(win);
+ h_deallocate_vas_window(win->vas_win.winid);
+ return rc;
+}
+
+/*
+ * The hypervisor reduces the available credits if the LPAR lost core. It
+ * means the excessive windows should not be active and the user space
+ * should not be using these windows to send compression requests to NX.
+ * So the kernel closes the excessive windows and unmap the paste address
+ * such that the user space receives paste instruction failure. Then up to
+ * the user space to fall back to SW compression and manage with the
+ * existing windows.
+ */
+static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
+ bool migrate)
+{
+ struct pseries_vas_window *win, *tmp;
+ struct vas_user_win_ref *task_ref;
+ struct vm_area_struct *vma;
+ int rc = 0, flag;
+
+ if (migrate)
+ flag = VAS_WIN_MIGRATE_CLOSE;
+ else
+ flag = VAS_WIN_NO_CRED_CLOSE;
+
+ list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
+ /*
+ * This window is already closed due to lost credit
+ * or for migration before. Go for next window.
+ * For migration, nothing to do since this window
+ * closed for DLPAR and will be reopened even on
+ * the destination system with other DLPAR operation.
+ */
+ if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
+ (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
+ win->vas_win.status |= flag;
+ continue;
+ }
+
+ task_ref = &win->vas_win.task_ref;
+ /*
+ * VAS mmap (coproc_mmap()) and its fault handler
+ * (vas_mmap_fault()) are called after holding mmap lock.
+ * So hold mmap mutex after mmap_lock to avoid deadlock.
+ */
+ mmap_write_lock(task_ref->mm);
+ mutex_lock(&task_ref->mmap_mutex);
+ vma = task_ref->vma;
+ /*
+ * Number of available credits are reduced, So select
+ * and close windows.
+ */
+ win->vas_win.status |= flag;
+
+ /*
+ * vma is set in the original mapping. But this mapping
+ * is done with mmap() after the window is opened with ioctl.
+ * so we may not see the original mapping if the core remove
+ * is done before the original mmap() and after the ioctl.
+ */
+ if (vma)
+ zap_vma_pages(vma);
+
+ mutex_unlock(&task_ref->mmap_mutex);
+ mmap_write_unlock(task_ref->mm);
+ /*
+ * Close VAS window in the hypervisor, but do not
+ * free vas_window struct since it may be reused
+ * when the credit is available later (DLPAR with
+ * adding cores). This struct will be used
+ * later when the process issued with close(FD).
+ */
+ rc = deallocate_free_window(win);
+ /*
+ * This failure is from the hypervisor.
+ * No way to stop migration for these failures.
+ * So ignore error and continue closing other windows.
+ */
+ if (rc && !migrate)
+ return rc;
+
+ vcap->nr_close_wins++;
+
+ /*
+ * For migration, do not depend on lpar_creds in case if
+ * mismatch with the hypervisor value (should not happen).
+ * So close all active windows in the list and will be
+ * reopened windows based on the new lpar_creds on the
+ * destination system during resume.
+ */
+ if (!migrate && !--excess_creds)
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Get new VAS capabilities when the core add/removal configuration
+ * changes. Reconfig window configurations based on the credits
+ * availability from this new capabilities.
+ */
+int vas_reconfig_capabilties(u8 type, int new_nr_creds)
+{
+ struct vas_cop_feat_caps *caps;
+ int old_nr_creds;
+ struct vas_caps *vcaps;
+ int rc = 0, nr_active_wins;
+
+ if (type >= VAS_MAX_FEAT_TYPE) {
+ pr_err("Invalid credit type %d\n", type);
+ return -EINVAL;
+ }
+
+ vcaps = &vascaps[type];
+ caps = &vcaps->caps;
+
+ mutex_lock(&vas_pseries_mutex);
+
+ old_nr_creds = atomic_read(&caps->nr_total_credits);
+
+ atomic_set(&caps->nr_total_credits, new_nr_creds);
+ /*
+ * The total number of available credits may be decreased or
+ * increased with DLPAR operation. Means some windows have to be
+ * closed / reopened. Hold the vas_pseries_mutex so that the
+ * user space can not open new windows.
+ */
+ if (old_nr_creds < new_nr_creds) {
+ /*
+ * If the existing target credits is less than the new
+ * target, reopen windows if they are closed due to
+ * the previous DLPAR (core removal).
+ */
+ rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
+ false);
+ } else {
+ /*
+ * # active windows is more than new LPAR available
+ * credits. So close the excessive windows.
+ * On pseries, each window will have 1 credit.
+ */
+ nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
+ if (nr_active_wins > new_nr_creds)
+ rc = reconfig_close_windows(vcaps,
+ nr_active_wins - new_nr_creds,
+ false);
+ }
+
+ mutex_unlock(&vas_pseries_mutex);
+ return rc;
+}
+
+int pseries_vas_dlpar_cpu(void)
+{
+ int new_nr_creds, rc;
+
+ /*
+ * NX-GZIP is not enabled. Nothing to do for DLPAR event
+ */
+ if (!copypaste_feat)
+ return 0;
+
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+ vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
+ (u64)virt_to_phys(&hv_cop_caps));
+ if (!rc) {
+ new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+ rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
+ }
+
+ if (rc)
+ pr_err("Failed reconfig VAS capabilities with DLPAR\n");
+
+ return rc;
+}
+
+/*
+ * Total number of default credits available (target_credits)
+ * in LPAR depends on number of cores configured. It varies based on
+ * whether processors are in shared mode or dedicated mode.
+ * Get the notifier when CPU configuration is changed with DLPAR
+ * operation so that get the new target_credits (vas default capabilities)
+ * and then update the existing windows usage if needed.
+ */
+static int pseries_vas_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct of_reconfig_data *rd = data;
+ struct device_node *dn = rd->dn;
+ const __be32 *intserv = NULL;
+ int len;
+
+ /*
+ * For shared CPU partition, the hypervisor assigns total credits
+ * based on entitled core capacity. So updating VAS windows will
+ * be called from lparcfg_write().
+ */
+ if (is_shared_processor())
+ return NOTIFY_OK;
+
+ if ((action == OF_RECONFIG_ATTACH_NODE) ||
+ (action == OF_RECONFIG_DETACH_NODE))
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
+ &len);
+ /*
+ * Processor config is not changed
+ */
+ if (!intserv)
+ return NOTIFY_OK;
+
+ return pseries_vas_dlpar_cpu();
+}
+
+static struct notifier_block pseries_vas_nb = {
+ .notifier_call = pseries_vas_notifier,
+};
+
+/*
+ * For LPM, all windows have to be closed on the source partition
+ * before migration and reopen them on the destination partition
+ * after migration. So closing windows during suspend and
+ * reopen them during resume.
+ */
+int vas_migration_handler(int action)
+{
+ struct vas_cop_feat_caps *caps;
+ int old_nr_creds, new_nr_creds = 0;
+ struct vas_caps *vcaps;
+ int i, rc = 0;
+
+ pr_info("VAS migration event %d\n", action);
+
+ /*
+ * NX-GZIP is not enabled. Nothing to do for migration.
+ */
+ if (!copypaste_feat)
+ return rc;
+
+ if (action == VAS_SUSPEND)
+ migration_in_progress = true;
+ else
+ migration_in_progress = false;
+
+ for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
+ vcaps = &vascaps[i];
+ caps = &vcaps->caps;
+ old_nr_creds = atomic_read(&caps->nr_total_credits);
+
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
+ vcaps->feat,
+ (u64)virt_to_phys(&hv_cop_caps));
+ if (!rc) {
+ new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
+ /*
+ * Should not happen. But incase print messages, close
+ * all windows in the list during suspend and reopen
+ * windows based on new lpar_creds on the destination
+ * system.
+ */
+ if (old_nr_creds != new_nr_creds) {
+ pr_err("Target credits mismatch with the hypervisor\n");
+ pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
+ action, old_nr_creds, new_nr_creds);
+ pr_err("Used creds: %d, Active creds: %d\n",
+ atomic_read(&caps->nr_used_credits),
+ vcaps->nr_open_windows - vcaps->nr_close_wins);
+ }
+ } else {
+ pr_err("state(%d): Get VAS capabilities failed with %d\n",
+ action, rc);
+ /*
+ * We can not stop migration with the current lpm
+ * implementation. So continue closing all windows in
+ * the list (during suspend) and return without
+ * opening windows (during resume) if VAS capabilities
+ * HCALL failed.
+ */
+ if (action == VAS_RESUME)
+ goto out;
+ }
+
+ switch (action) {
+ case VAS_SUSPEND:
+ mutex_lock(&vas_pseries_mutex);
+ rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
+ true);
+ /*
+ * Windows are included in the list after successful
+ * open. So wait for closing these in-progress open
+ * windows in vas_allocate_window() which will be
+ * done if the migration_in_progress is set.
+ */
+ while (vcaps->nr_open_wins_progress) {
+ mutex_unlock(&vas_pseries_mutex);
+ msleep(10);
+ mutex_lock(&vas_pseries_mutex);
+ }
+ mutex_unlock(&vas_pseries_mutex);
+ break;
+ case VAS_RESUME:
+ mutex_lock(&vas_pseries_mutex);
+ atomic_set(&caps->nr_total_credits, new_nr_creds);
+ rc = reconfig_open_windows(vcaps, new_nr_creds, true);
+ mutex_unlock(&vas_pseries_mutex);
+ break;
+ default:
+ /* should not happen */
+ pr_err("Invalid migration action %d\n", action);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Ignore errors during suspend and return for resume.
+ */
+ if (rc && (action == VAS_RESUME))
+ goto out;
+ }
+
+ pr_info("VAS migration event (%d) successful\n", action);
+
+out:
+ return rc;
+}
+
+static int __init pseries_vas_init(void)
+{
+ struct hv_vas_all_caps *hv_caps;
+ int rc = 0;
+
+ /*
+ * Linux supports user space COPY/PASTE only with Radix
+ */
+ if (!radix_enabled()) {
+ copypaste_feat = false;
+ pr_err("API is supported only with radix page tables\n");
+ return -ENOTSUPP;
+ }
+
+ hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
+ if (!hv_caps)
+ return -ENOMEM;
+ /*
+ * Get VAS overall capabilities by passing 0 to feature type.
+ */
+ rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
+ (u64)virt_to_phys(hv_caps));
+ if (rc)
+ goto out;
+
+ caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
+ caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
+
+ sysfs_pseries_vas_init(&caps_all);
+
+ /*
+ * QOS capabilities available
+ */
+ if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
+ rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
+ VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
+
+ if (rc)
+ goto out;
+ }
+ /*
+ * Default capabilities available
+ */
+ if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
+ rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
+ VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
+
+ if (!rc && copypaste_feat) {
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ of_reconfig_notifier_register(&pseries_vas_nb);
+
+ pr_info("GZIP feature is available\n");
+ } else {
+ /*
+ * Should not happen, but only when get default
+ * capabilities HCALL failed. So disable copy paste
+ * feature.
+ */
+ copypaste_feat = false;
+ }
+
+out:
+ kfree(hv_caps);
+ return rc;
+}
+machine_device_initcall(pseries, pseries_vas_init);
diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h
new file mode 100644
index 000000000..45567cd13
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vas.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020-21 IBM Corp.
+ */
+
+#ifndef _VAS_H
+#define _VAS_H
+#include <asm/vas.h>
+#include <linux/mutex.h>
+#include <linux/stringify.h>
+
+/*
+ * VAS window modify flags
+ */
+#define VAS_MOD_WIN_CLOSE PPC_BIT(0)
+#define VAS_MOD_WIN_JOBS_KILL PPC_BIT(1)
+#define VAS_MOD_WIN_DR PPC_BIT(3)
+#define VAS_MOD_WIN_PR PPC_BIT(4)
+#define VAS_MOD_WIN_SF PPC_BIT(5)
+#define VAS_MOD_WIN_TA PPC_BIT(6)
+#define VAS_MOD_WIN_FLAGS (VAS_MOD_WIN_JOBS_KILL | VAS_MOD_WIN_DR | \
+ VAS_MOD_WIN_PR | VAS_MOD_WIN_SF)
+
+#define VAS_WIN_ACTIVE 0x0
+#define VAS_WIN_CLOSED 0x1
+#define VAS_WIN_INACTIVE 0x2 /* Inactive due to HW failure */
+/* Process of being modified, deallocated, or quiesced */
+#define VAS_WIN_MOD_IN_PROCESS 0x3
+
+#define VAS_COPY_PASTE_USER_MODE 0x00000001
+#define VAS_COP_OP_USER_MODE 0x00000010
+
+#define VAS_GZIP_QOS_CAPABILITIES 0x56516F73477A6970
+#define VAS_GZIP_DEFAULT_CAPABILITIES 0x56446566477A6970
+
+enum vas_migrate_action {
+ VAS_SUSPEND,
+ VAS_RESUME,
+};
+
+/*
+ * Co-processor feature - GZIP QoS windows or GZIP default windows
+ */
+enum vas_cop_feat_type {
+ VAS_GZIP_QOS_FEAT_TYPE,
+ VAS_GZIP_DEF_FEAT_TYPE,
+ VAS_MAX_FEAT_TYPE,
+};
+
+/*
+ * Use to get feature specific capabilities from the
+ * hypervisor.
+ */
+struct hv_vas_cop_feat_caps {
+ __be64 descriptor;
+ u8 win_type; /* Default or QoS type */
+ u8 user_mode;
+ __be16 max_lpar_creds;
+ __be16 max_win_creds;
+ union {
+ __be16 reserved;
+ __be16 def_lpar_creds; /* Used for default capabilities */
+ };
+ __be16 target_lpar_creds;
+} __packed __aligned(0x1000);
+
+/*
+ * Feature specific (QoS or default) capabilities.
+ */
+struct vas_cop_feat_caps {
+ u64 descriptor;
+ u8 win_type; /* Default or QoS type */
+ u8 user_mode; /* User mode copy/paste or COP HCALL */
+ u16 max_lpar_creds; /* Max credits available in LPAR */
+ /* Max credits can be assigned per window */
+ u16 max_win_creds;
+ union {
+ u16 reserved; /* Used for QoS credit type */
+ u16 def_lpar_creds; /* Used for default credit type */
+ };
+ /* Total LPAR available credits. Can be different from max LPAR */
+ /* credits due to DLPAR operation */
+ atomic_t nr_total_credits; /* Total credits assigned to LPAR */
+ atomic_t nr_used_credits; /* Used credits so far */
+};
+
+/*
+ * Feature (QoS or Default) specific to store capabilities and
+ * the list of open windows.
+ */
+struct vas_caps {
+ struct vas_cop_feat_caps caps;
+ struct list_head list; /* List of open windows */
+ int nr_open_wins_progress; /* Number of open windows in */
+ /* progress. Used in migration */
+ int nr_close_wins; /* closed windows in the hypervisor for DLPAR */
+ int nr_open_windows; /* Number of successful open windows */
+ u8 feat; /* Feature type */
+};
+
+/*
+ * To get window information from the hypervisor.
+ */
+struct hv_vas_win_lpar {
+ __be16 version;
+ u8 win_type;
+ u8 status;
+ __be16 credits; /* No of credits assigned to this window */
+ __be16 reserved;
+ __be32 pid; /* LPAR Process ID */
+ __be32 tid; /* LPAR Thread ID */
+ __be64 win_addr; /* Paste address */
+ __be32 interrupt; /* Interrupt when NX request completes */
+ __be32 fault; /* Interrupt when NX sees fault */
+ /* Associativity Domain Identifiers as returned in */
+ /* H_HOME_NODE_ASSOCIATIVITY */
+ __be64 domain[6];
+ __be64 win_util; /* Number of bytes processed */
+} __packed __aligned(0x1000);
+
+struct pseries_vas_window {
+ struct vas_window vas_win;
+ u64 win_addr; /* Physical paste address */
+ u8 win_type; /* QoS or Default window */
+ u32 complete_irq; /* Completion interrupt */
+ u32 fault_irq; /* Fault interrupt */
+ u64 domain[6]; /* Associativity domain Ids */
+ /* this window is allocated */
+ u64 util;
+ u32 pid; /* PID associated with this window */
+
+ /* List of windows opened which is used for LPM */
+ struct list_head win_list;
+ u64 flags;
+ char *name;
+ int fault_virq;
+ atomic_t pending_faults; /* Number of pending faults */
+};
+
+int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
+int vas_reconfig_capabilties(u8 type, int new_nr_creds);
+int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
+
+#ifdef CONFIG_PPC_VAS
+int vas_migration_handler(int action);
+int pseries_vas_dlpar_cpu(void);
+#else
+static inline int vas_migration_handler(int action)
+{
+ return 0;
+}
+static inline int pseries_vas_dlpar_cpu(void)
+{
+ return 0;
+}
+#endif
+#endif /* _VAS_H */
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
new file mode 100644
index 000000000..2dc9cbc4b
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -0,0 +1,1729 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IBM PowerPC Virtual I/O Infrastructure Support.
+ *
+ * Copyright (c) 2003,2008 IBM Corp.
+ * Dave Engebretsen engebret@us.ibm.com
+ * Santiago Leon santil@us.ibm.com
+ * Hollis Blanchard <hollisb@us.ibm.com>
+ * Stephen Rothwell
+ * Robert Jennings <rcjenn@us.ibm.com>
+ */
+
+#include <linux/cpu.h>
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/stat.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/console.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-map-ops.h>
+#include <linux/kobject.h>
+#include <linux/kexec.h>
+#include <linux/of_irq.h>
+
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#include <asm/vio.h>
+#include <asm/prom.h>
+#include <asm/firmware.h>
+#include <asm/tce.h>
+#include <asm/page.h>
+#include <asm/hvcall.h>
+#include <asm/machdep.h>
+
+static struct vio_dev vio_bus_device = { /* fake "parent" device */
+ .name = "vio",
+ .type = "",
+ .dev.init_name = "vio",
+ .dev.bus = &vio_bus_type,
+};
+
+#ifdef CONFIG_PPC_SMLPAR
+/**
+ * vio_cmo_pool - A pool of IO memory for CMO use
+ *
+ * @size: The size of the pool in bytes
+ * @free: The amount of free memory in the pool
+ */
+struct vio_cmo_pool {
+ size_t size;
+ size_t free;
+};
+
+/* How many ms to delay queued balance work */
+#define VIO_CMO_BALANCE_DELAY 100
+
+/* Portion out IO memory to CMO devices by this chunk size */
+#define VIO_CMO_BALANCE_CHUNK 131072
+
+/**
+ * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
+ *
+ * @vio_dev: struct vio_dev pointer
+ * @list: pointer to other devices on bus that are being tracked
+ */
+struct vio_cmo_dev_entry {
+ struct vio_dev *viodev;
+ struct list_head list;
+};
+
+/**
+ * vio_cmo - VIO bus accounting structure for CMO entitlement
+ *
+ * @lock: spinlock for entire structure
+ * @balance_q: work queue for balancing system entitlement
+ * @device_list: list of CMO-enabled devices requiring entitlement
+ * @entitled: total system entitlement in bytes
+ * @reserve: pool of memory from which devices reserve entitlement, incl. spare
+ * @excess: pool of excess entitlement not needed for device reserves or spare
+ * @spare: IO memory for device hotplug functionality
+ * @min: minimum necessary for system operation
+ * @desired: desired memory for system operation
+ * @curr: bytes currently allocated
+ * @high: high water mark for IO data usage
+ */
+static struct vio_cmo {
+ spinlock_t lock;
+ struct delayed_work balance_q;
+ struct list_head device_list;
+ size_t entitled;
+ struct vio_cmo_pool reserve;
+ struct vio_cmo_pool excess;
+ size_t spare;
+ size_t min;
+ size_t desired;
+ size_t curr;
+ size_t high;
+} vio_cmo;
+
+/**
+ * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
+ */
+static int vio_cmo_num_OF_devs(void)
+{
+ struct device_node *node_vroot;
+ int count = 0;
+
+ /*
+ * Count the number of vdevice entries with an
+ * ibm,my-dma-window OF property
+ */
+ node_vroot = of_find_node_by_name(NULL, "vdevice");
+ if (node_vroot) {
+ struct device_node *of_node;
+ struct property *prop;
+
+ for_each_child_of_node(node_vroot, of_node) {
+ prop = of_find_property(of_node, "ibm,my-dma-window",
+ NULL);
+ if (prop)
+ count++;
+ }
+ }
+ of_node_put(node_vroot);
+ return count;
+}
+
+/**
+ * vio_cmo_alloc - allocate IO memory for CMO-enable devices
+ *
+ * @viodev: VIO device requesting IO memory
+ * @size: size of allocation requested
+ *
+ * Allocations come from memory reserved for the devices and any excess
+ * IO memory available to all devices. The spare pool used to service
+ * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
+ * made available.
+ *
+ * Return codes:
+ * 0 for successful allocation and -ENOMEM for a failure
+ */
+static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
+{
+ unsigned long flags;
+ size_t reserve_free = 0;
+ size_t excess_free = 0;
+ int ret = -ENOMEM;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Determine the amount of free entitlement available in reserve */
+ if (viodev->cmo.entitled > viodev->cmo.allocated)
+ reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
+
+ /* If spare is not fulfilled, the excess pool can not be used. */
+ if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
+ excess_free = vio_cmo.excess.free;
+
+ /* The request can be satisfied */
+ if ((reserve_free + excess_free) >= size) {
+ vio_cmo.curr += size;
+ if (vio_cmo.curr > vio_cmo.high)
+ vio_cmo.high = vio_cmo.curr;
+ viodev->cmo.allocated += size;
+ size -= min(reserve_free, size);
+ vio_cmo.excess.free -= size;
+ ret = 0;
+ }
+
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return ret;
+}
+
+/**
+ * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
+ * @viodev: VIO device freeing IO memory
+ * @size: size of deallocation
+ *
+ * IO memory is freed by the device back to the correct memory pools.
+ * The spare pool is replenished first from either memory pool, then
+ * the reserve pool is used to reduce device entitlement, the excess
+ * pool is used to increase the reserve pool toward the desired entitlement
+ * target, and then the remaining memory is returned to the pools.
+ *
+ */
+static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
+{
+ unsigned long flags;
+ size_t spare_needed = 0;
+ size_t excess_freed = 0;
+ size_t reserve_freed = size;
+ size_t tmp;
+ int balance = 0;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ vio_cmo.curr -= size;
+
+ /* Amount of memory freed from the excess pool */
+ if (viodev->cmo.allocated > viodev->cmo.entitled) {
+ excess_freed = min(reserve_freed, (viodev->cmo.allocated -
+ viodev->cmo.entitled));
+ reserve_freed -= excess_freed;
+ }
+
+ /* Remove allocation from device */
+ viodev->cmo.allocated -= (reserve_freed + excess_freed);
+
+ /* Spare is a subset of the reserve pool, replenish it first. */
+ spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
+
+ /*
+ * Replenish the spare in the reserve pool from the excess pool.
+ * This moves entitlement into the reserve pool.
+ */
+ if (spare_needed && excess_freed) {
+ tmp = min(excess_freed, spare_needed);
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+ vio_cmo.spare += tmp;
+ excess_freed -= tmp;
+ spare_needed -= tmp;
+ balance = 1;
+ }
+
+ /*
+ * Replenish the spare in the reserve pool from the reserve pool.
+ * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
+ * if needed, and gives it to the spare pool. The amount of used
+ * memory in this pool does not change.
+ */
+ if (spare_needed && reserve_freed) {
+ tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT));
+
+ vio_cmo.spare += tmp;
+ viodev->cmo.entitled -= tmp;
+ reserve_freed -= tmp;
+ spare_needed -= tmp;
+ balance = 1;
+ }
+
+ /*
+ * Increase the reserve pool until the desired allocation is met.
+ * Move an allocation freed from the excess pool into the reserve
+ * pool and schedule a balance operation.
+ */
+ if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
+ tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
+
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+ excess_freed -= tmp;
+ balance = 1;
+ }
+
+ /* Return memory from the excess pool to that pool */
+ if (excess_freed)
+ vio_cmo.excess.free += excess_freed;
+
+ if (balance)
+ schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_entitlement_update - Manage system entitlement changes
+ *
+ * @new_entitlement: new system entitlement to attempt to accommodate
+ *
+ * Increases in entitlement will be used to fulfill the spare entitlement
+ * and the rest is given to the excess pool. Decreases, if they are
+ * possible, come from the excess pool and from unused device entitlement
+ *
+ * Returns: 0 on success, -ENOMEM when change can not be made
+ */
+int vio_cmo_entitlement_update(size_t new_entitlement)
+{
+ struct vio_dev *viodev;
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t avail, delta, tmp;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Entitlement increases */
+ if (new_entitlement > vio_cmo.entitled) {
+ delta = new_entitlement - vio_cmo.entitled;
+
+ /* Fulfill spare allocation */
+ if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
+ tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
+ vio_cmo.spare += tmp;
+ vio_cmo.reserve.size += tmp;
+ delta -= tmp;
+ }
+
+ /* Remaining new allocation goes to the excess pool */
+ vio_cmo.entitled += delta;
+ vio_cmo.excess.size += delta;
+ vio_cmo.excess.free += delta;
+
+ goto out;
+ }
+
+ /* Entitlement decreases */
+ delta = vio_cmo.entitled - new_entitlement;
+ avail = vio_cmo.excess.free;
+
+ /*
+ * Need to check how much unused entitlement each device can
+ * sacrifice to fulfill entitlement change.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ if (avail >= delta)
+ break;
+
+ viodev = dev_ent->viodev;
+ if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+ (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+ avail += viodev->cmo.entitled -
+ max_t(size_t, viodev->cmo.allocated,
+ VIO_CMO_MIN_ENT);
+ }
+
+ if (delta <= avail) {
+ vio_cmo.entitled -= delta;
+
+ /* Take entitlement from the excess pool first */
+ tmp = min(vio_cmo.excess.free, delta);
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.excess.free -= tmp;
+ delta -= tmp;
+
+ /*
+ * Remove all but VIO_CMO_MIN_ENT bytes from devices
+ * until entitlement change is served
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ if (!delta)
+ break;
+
+ viodev = dev_ent->viodev;
+ tmp = 0;
+ if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+ (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+ tmp = viodev->cmo.entitled -
+ max_t(size_t, viodev->cmo.allocated,
+ VIO_CMO_MIN_ENT);
+ viodev->cmo.entitled -= min(tmp, delta);
+ delta -= min(tmp, delta);
+ }
+ } else {
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return -ENOMEM;
+ }
+
+out:
+ schedule_delayed_work(&vio_cmo.balance_q, 0);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return 0;
+}
+
+/**
+ * vio_cmo_balance - Balance entitlement among devices
+ *
+ * @work: work queue structure for this operation
+ *
+ * Any system entitlement above the minimum needed for devices, or
+ * already allocated to devices, can be distributed to the devices.
+ * The list of devices is iterated through to recalculate the desired
+ * entitlement level and to determine how much entitlement above the
+ * minimum entitlement is allocated to devices.
+ *
+ * Small chunks of the available entitlement are given to devices until
+ * their requirements are fulfilled or there is no entitlement left to give.
+ * Upon completion sizes of the reserve and excess pools are calculated.
+ *
+ * The system minimum entitlement level is also recalculated here.
+ * Entitlement will be reserved for devices even after vio_bus_remove to
+ * accommodate reloading the driver. The OF tree is walked to count the
+ * number of devices present and this will remove entitlement for devices
+ * that have actually left the system after having vio_bus_remove called.
+ */
+static void vio_cmo_balance(struct work_struct *work)
+{
+ struct vio_cmo *cmo;
+ struct vio_dev *viodev;
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t avail = 0, level, chunk, need;
+ int devcount = 0, fulfilled;
+
+ cmo = container_of(work, struct vio_cmo, balance_q.work);
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Calculate minimum entitlement and fulfill spare */
+ cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
+ BUG_ON(cmo->min > cmo->entitled);
+ cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
+ cmo->min += cmo->spare;
+ cmo->desired = cmo->min;
+
+ /*
+ * Determine how much entitlement is available and reset device
+ * entitlements
+ */
+ avail = cmo->entitled - cmo->spare;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+ devcount++;
+ viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+ cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+ avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
+ }
+
+ /*
+ * Having provided each device with the minimum entitlement, loop
+ * over the devices portioning out the remaining entitlement
+ * until there is nothing left.
+ */
+ level = VIO_CMO_MIN_ENT;
+ while (avail) {
+ fulfilled = 0;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+
+ if (viodev->cmo.desired <= level) {
+ fulfilled++;
+ continue;
+ }
+
+ /*
+ * Give the device up to VIO_CMO_BALANCE_CHUNK
+ * bytes of entitlement, but do not exceed the
+ * desired level of entitlement for the device.
+ */
+ chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
+ chunk = min(chunk, (viodev->cmo.desired -
+ viodev->cmo.entitled));
+ viodev->cmo.entitled += chunk;
+
+ /*
+ * If the memory for this entitlement increase was
+ * already allocated to the device it does not come
+ * from the available pool being portioned out.
+ */
+ need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
+ max(viodev->cmo.allocated, level);
+ avail -= need;
+
+ }
+ if (fulfilled == devcount)
+ break;
+ level += VIO_CMO_BALANCE_CHUNK;
+ }
+
+ /* Calculate new reserve and excess pool sizes */
+ cmo->reserve.size = cmo->min;
+ cmo->excess.free = 0;
+ cmo->excess.size = 0;
+ need = 0;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+ /* Calculated reserve size above the minimum entitlement */
+ if (viodev->cmo.entitled)
+ cmo->reserve.size += (viodev->cmo.entitled -
+ VIO_CMO_MIN_ENT);
+ /* Calculated used excess entitlement */
+ if (viodev->cmo.allocated > viodev->cmo.entitled)
+ need += viodev->cmo.allocated - viodev->cmo.entitled;
+ }
+ cmo->excess.size = cmo->entitled - cmo->reserve.size;
+ cmo->excess.free = cmo->excess.size - need;
+
+ cancel_delayed_work(to_delayed_work(work));
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ unsigned long attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ void *ret;
+
+ if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) {
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return NULL;
+ }
+
+ ret = iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
+ dma_handle, dev->coherent_dma_mask, flag,
+ dev_to_node(dev));
+ if (unlikely(ret == NULL)) {
+ vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
+ atomic_inc(&viodev->cmo.allocs_failed);
+ }
+
+ return ret;
+}
+
+static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle,
+ unsigned long attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+
+ iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
+ vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
+}
+
+static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+ dma_addr_t ret = DMA_MAPPING_ERROR;
+
+ if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl))))
+ goto out_fail;
+ ret = iommu_map_page(dev, tbl, page, offset, size, dma_get_mask(dev),
+ direction, attrs);
+ if (unlikely(ret == DMA_MAPPING_ERROR))
+ goto out_deallocate;
+ return ret;
+
+out_deallocate:
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
+out_fail:
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return DMA_MAPPING_ERROR;
+}
+
+static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
+ size_t size,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+
+ iommu_unmap_page(tbl, dma_handle, size, direction, attrs);
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
+}
+
+static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+ struct scatterlist *sgl;
+ int ret, count;
+ size_t alloc_size = 0;
+
+ for_each_sg(sglist, sgl, nelems, count)
+ alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
+
+ ret = vio_cmo_alloc(viodev, alloc_size);
+ if (ret)
+ goto out_fail;
+ ret = ppc_iommu_map_sg(dev, tbl, sglist, nelems, dma_get_mask(dev),
+ direction, attrs);
+ if (unlikely(!ret))
+ goto out_deallocate;
+
+ for_each_sg(sglist, sgl, ret, count)
+ alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
+ if (alloc_size)
+ vio_cmo_dealloc(viodev, alloc_size);
+ return ret;
+
+out_deallocate:
+ vio_cmo_dealloc(viodev, alloc_size);
+out_fail:
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return ret;
+}
+
+static void vio_dma_iommu_unmap_sg(struct device *dev,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction,
+ unsigned long attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+ struct scatterlist *sgl;
+ size_t alloc_size = 0;
+ int count;
+
+ for_each_sg(sglist, sgl, nelems, count)
+ alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
+
+ ppc_iommu_unmap_sg(tbl, sglist, nelems, direction, attrs);
+ vio_cmo_dealloc(viodev, alloc_size);
+}
+
+static const struct dma_map_ops vio_dma_mapping_ops = {
+ .alloc = vio_dma_iommu_alloc_coherent,
+ .free = vio_dma_iommu_free_coherent,
+ .map_sg = vio_dma_iommu_map_sg,
+ .unmap_sg = vio_dma_iommu_unmap_sg,
+ .map_page = vio_dma_iommu_map_page,
+ .unmap_page = vio_dma_iommu_unmap_page,
+ .dma_supported = dma_iommu_dma_supported,
+ .get_required_mask = dma_iommu_get_required_mask,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
+};
+
+/**
+ * vio_cmo_set_dev_desired - Set desired entitlement for a device
+ *
+ * @viodev: struct vio_dev for device to alter
+ * @desired: new desired entitlement level in bytes
+ *
+ * For use by devices to request a change to their entitlement at runtime or
+ * through sysfs. The desired entitlement level is changed and a balancing
+ * of system resources is scheduled to run in the future.
+ */
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
+{
+ unsigned long flags;
+ struct vio_cmo_dev_entry *dev_ent;
+ int found = 0;
+
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ return;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ if (desired < VIO_CMO_MIN_ENT)
+ desired = VIO_CMO_MIN_ENT;
+
+ /*
+ * Changes will not be made for devices not in the device list.
+ * If it is not in the device list, then no driver is loaded
+ * for the device and it can not receive entitlement.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+ if (viodev == dev_ent->viodev) {
+ found = 1;
+ break;
+ }
+ if (!found) {
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return;
+ }
+
+ /* Increase/decrease in desired device entitlement */
+ if (desired >= viodev->cmo.desired) {
+ /* Just bump the bus and device values prior to a balance*/
+ vio_cmo.desired += desired - viodev->cmo.desired;
+ viodev->cmo.desired = desired;
+ } else {
+ /* Decrease bus and device values for desired entitlement */
+ vio_cmo.desired -= viodev->cmo.desired - desired;
+ viodev->cmo.desired = desired;
+ /*
+ * If less entitlement is desired than current entitlement, move
+ * any reserve memory in the change region to the excess pool.
+ */
+ if (viodev->cmo.entitled > desired) {
+ vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
+ vio_cmo.excess.size += viodev->cmo.entitled - desired;
+ /*
+ * If entitlement moving from the reserve pool to the
+ * excess pool is currently unused, add to the excess
+ * free counter.
+ */
+ if (viodev->cmo.allocated < viodev->cmo.entitled)
+ vio_cmo.excess.free += viodev->cmo.entitled -
+ max(viodev->cmo.allocated, desired);
+ viodev->cmo.entitled = desired;
+ }
+ }
+ schedule_delayed_work(&vio_cmo.balance_q, 0);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_bus_probe - Handle CMO specific bus probe activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Determine the devices IO memory entitlement needs, attempting
+ * to satisfy the system minimum entitlement at first and scheduling
+ * a balance operation to take care of the rest at a later time.
+ *
+ * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
+ * -ENOMEM when entitlement is not available for device or
+ * device entry.
+ *
+ */
+static int vio_cmo_bus_probe(struct vio_dev *viodev)
+{
+ struct vio_cmo_dev_entry *dev_ent;
+ struct device *dev = &viodev->dev;
+ struct iommu_table *tbl;
+ struct vio_driver *viodrv = to_vio_driver(dev->driver);
+ unsigned long flags;
+ size_t size;
+ bool dma_capable = false;
+
+ tbl = get_iommu_table_base(dev);
+
+ /* A device requires entitlement if it has a DMA window property */
+ switch (viodev->family) {
+ case VDEVICE:
+ if (of_get_property(viodev->dev.of_node,
+ "ibm,my-dma-window", NULL))
+ dma_capable = true;
+ break;
+ case PFO:
+ dma_capable = false;
+ break;
+ default:
+ dev_warn(dev, "unknown device family: %d\n", viodev->family);
+ BUG();
+ break;
+ }
+
+ /* Configure entitlement for the device. */
+ if (dma_capable) {
+ /* Check that the driver is CMO enabled and get desired DMA */
+ if (!viodrv->get_desired_dma) {
+ dev_err(dev, "%s: device driver does not support CMO\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ viodev->cmo.desired =
+ IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl);
+ if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
+ viodev->cmo.desired = VIO_CMO_MIN_ENT;
+ size = VIO_CMO_MIN_ENT;
+
+ dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
+ GFP_KERNEL);
+ if (!dev_ent)
+ return -ENOMEM;
+
+ dev_ent->viodev = viodev;
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ list_add(&dev_ent->list, &vio_cmo.device_list);
+ } else {
+ viodev->cmo.desired = 0;
+ size = 0;
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ }
+
+ /*
+ * If the needs for vio_cmo.min have not changed since they
+ * were last set, the number of devices in the OF tree has
+ * been constant and the IO memory for this is already in
+ * the reserve pool.
+ */
+ if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
+ VIO_CMO_MIN_ENT)) {
+ /* Updated desired entitlement if device requires it */
+ if (size)
+ vio_cmo.desired += (viodev->cmo.desired -
+ VIO_CMO_MIN_ENT);
+ } else {
+ size_t tmp;
+
+ tmp = vio_cmo.spare + vio_cmo.excess.free;
+ if (tmp < size) {
+ dev_err(dev, "%s: insufficient free "
+ "entitlement to add device. "
+ "Need %lu, have %lu\n", __func__,
+ size, (vio_cmo.spare + tmp));
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return -ENOMEM;
+ }
+
+ /* Use excess pool first to fulfill request */
+ tmp = min(size, vio_cmo.excess.free);
+ vio_cmo.excess.free -= tmp;
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+
+ /* Use spare if excess pool was insufficient */
+ vio_cmo.spare -= size - tmp;
+
+ /* Update bus accounting */
+ vio_cmo.min += size;
+ vio_cmo.desired += viodev->cmo.desired;
+ }
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return 0;
+}
+
+/**
+ * vio_cmo_bus_remove - Handle CMO specific bus removal activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Remove the device from the cmo device list. The minimum entitlement
+ * will be reserved for the device as long as it is in the system. The
+ * rest of the entitlement the device had been allocated will be returned
+ * to the system.
+ */
+static void vio_cmo_bus_remove(struct vio_dev *viodev)
+{
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t tmp;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ if (viodev->cmo.allocated) {
+ dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
+ "allocated after remove operation.\n",
+ __func__, viodev->cmo.allocated);
+ BUG();
+ }
+
+ /*
+ * Remove the device from the device list being maintained for
+ * CMO enabled devices.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+ if (viodev == dev_ent->viodev) {
+ list_del(&dev_ent->list);
+ kfree(dev_ent);
+ break;
+ }
+
+ /*
+ * Devices may not require any entitlement and they do not need
+ * to be processed. Otherwise, return the device's entitlement
+ * back to the pools.
+ */
+ if (viodev->cmo.entitled) {
+ /*
+ * This device has not yet left the OF tree, it's
+ * minimum entitlement remains in vio_cmo.min and
+ * vio_cmo.desired
+ */
+ vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+
+ /*
+ * Save min allocation for device in reserve as long
+ * as it exists in OF tree as determined by later
+ * balance operation
+ */
+ viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
+
+ /* Replenish spare from freed reserve pool */
+ if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
+ tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
+ vio_cmo.spare));
+ vio_cmo.spare += tmp;
+ viodev->cmo.entitled -= tmp;
+ }
+
+ /* Remaining reserve goes to excess pool */
+ vio_cmo.excess.size += viodev->cmo.entitled;
+ vio_cmo.excess.free += viodev->cmo.entitled;
+ vio_cmo.reserve.size -= viodev->cmo.entitled;
+
+ /*
+ * Until the device is removed it will keep a
+ * minimum entitlement; this will guarantee that
+ * a module unload/load will result in a success.
+ */
+ viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+ viodev->cmo.desired = VIO_CMO_MIN_ENT;
+ atomic_set(&viodev->cmo.allocs_failed, 0);
+ }
+
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
+{
+ set_dma_ops(&viodev->dev, &vio_dma_mapping_ops);
+}
+
+/**
+ * vio_cmo_bus_init - CMO entitlement initialization at bus init time
+ *
+ * Set up the reserve and excess entitlement pools based on available
+ * system entitlement and the number of devices in the OF tree that
+ * require entitlement in the reserve pool.
+ */
+static void vio_cmo_bus_init(void)
+{
+ struct hvcall_mpp_data mpp_data;
+ int err;
+
+ memset(&vio_cmo, 0, sizeof(struct vio_cmo));
+ spin_lock_init(&vio_cmo.lock);
+ INIT_LIST_HEAD(&vio_cmo.device_list);
+ INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
+
+ /* Get current system entitlement */
+ err = h_get_mpp(&mpp_data);
+
+ /*
+ * On failure, continue with entitlement set to 0, will panic()
+ * later when spare is reserved.
+ */
+ if (err != H_SUCCESS) {
+ printk(KERN_ERR "%s: unable to determine system IO "\
+ "entitlement. (%d)\n", __func__, err);
+ vio_cmo.entitled = 0;
+ } else {
+ vio_cmo.entitled = mpp_data.entitled_mem;
+ }
+
+ /* Set reservation and check against entitlement */
+ vio_cmo.spare = VIO_CMO_MIN_ENT;
+ vio_cmo.reserve.size = vio_cmo.spare;
+ vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
+ VIO_CMO_MIN_ENT);
+ if (vio_cmo.reserve.size > vio_cmo.entitled) {
+ printk(KERN_ERR "%s: insufficient system entitlement\n",
+ __func__);
+ panic("%s: Insufficient system entitlement", __func__);
+ }
+
+ /* Set the remaining accounting variables */
+ vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
+ vio_cmo.excess.free = vio_cmo.excess.size;
+ vio_cmo.min = vio_cmo.reserve.size;
+ vio_cmo.desired = vio_cmo.reserve.size;
+}
+
+/* sysfs device functions and data structures for CMO */
+
+#define viodev_cmo_rd_attr(name) \
+static ssize_t cmo_##name##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
+}
+
+static ssize_t cmo_allocs_failed_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
+}
+
+static ssize_t cmo_allocs_failed_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ atomic_set(&viodev->cmo.allocs_failed, 0);
+ return count;
+}
+
+static ssize_t cmo_desired_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ size_t new_desired;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &new_desired);
+ if (ret)
+ return ret;
+
+ vio_cmo_set_dev_desired(viodev, new_desired);
+ return count;
+}
+
+viodev_cmo_rd_attr(desired);
+viodev_cmo_rd_attr(entitled);
+viodev_cmo_rd_attr(allocated);
+
+static ssize_t name_show(struct device *, struct device_attribute *, char *);
+static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+
+static struct device_attribute dev_attr_name;
+static struct device_attribute dev_attr_devspec;
+static struct device_attribute dev_attr_modalias;
+
+static DEVICE_ATTR_RO(cmo_entitled);
+static DEVICE_ATTR_RO(cmo_allocated);
+static DEVICE_ATTR_RW(cmo_desired);
+static DEVICE_ATTR_RW(cmo_allocs_failed);
+
+static struct attribute *vio_cmo_dev_attrs[] = {
+ &dev_attr_name.attr,
+ &dev_attr_devspec.attr,
+ &dev_attr_modalias.attr,
+ &dev_attr_cmo_entitled.attr,
+ &dev_attr_cmo_allocated.attr,
+ &dev_attr_cmo_desired.attr,
+ &dev_attr_cmo_allocs_failed.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vio_cmo_dev);
+
+/* sysfs bus functions and data structures for CMO */
+
+#define viobus_cmo_rd_attr(name) \
+static ssize_t cmo_bus_##name##_show(const struct bus_type *bt, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", vio_cmo.name); \
+} \
+static struct bus_attribute bus_attr_cmo_bus_##name = \
+ __ATTR(cmo_##name, S_IRUGO, cmo_bus_##name##_show, NULL)
+
+#define viobus_cmo_pool_rd_attr(name, var) \
+static ssize_t \
+cmo_##name##_##var##_show(const struct bus_type *bt, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", vio_cmo.name.var); \
+} \
+static BUS_ATTR_RO(cmo_##name##_##var)
+
+viobus_cmo_rd_attr(entitled);
+viobus_cmo_rd_attr(spare);
+viobus_cmo_rd_attr(min);
+viobus_cmo_rd_attr(desired);
+viobus_cmo_rd_attr(curr);
+viobus_cmo_pool_rd_attr(reserve, size);
+viobus_cmo_pool_rd_attr(excess, size);
+viobus_cmo_pool_rd_attr(excess, free);
+
+static ssize_t cmo_high_show(const struct bus_type *bt, char *buf)
+{
+ return sprintf(buf, "%lu\n", vio_cmo.high);
+}
+
+static ssize_t cmo_high_store(const struct bus_type *bt, const char *buf,
+ size_t count)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ vio_cmo.high = vio_cmo.curr;
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+
+ return count;
+}
+static BUS_ATTR_RW(cmo_high);
+
+static struct attribute *vio_bus_attrs[] = {
+ &bus_attr_cmo_bus_entitled.attr,
+ &bus_attr_cmo_bus_spare.attr,
+ &bus_attr_cmo_bus_min.attr,
+ &bus_attr_cmo_bus_desired.attr,
+ &bus_attr_cmo_bus_curr.attr,
+ &bus_attr_cmo_high.attr,
+ &bus_attr_cmo_reserve_size.attr,
+ &bus_attr_cmo_excess_size.attr,
+ &bus_attr_cmo_excess_free.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vio_bus);
+
+static void __init vio_cmo_sysfs_init(void)
+{
+ vio_bus_type.dev_groups = vio_cmo_dev_groups;
+ vio_bus_type.bus_groups = vio_bus_groups;
+}
+#else /* CONFIG_PPC_SMLPAR */
+int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
+static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
+static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
+static void vio_cmo_bus_init(void) {}
+static void __init vio_cmo_sysfs_init(void) { }
+#endif /* CONFIG_PPC_SMLPAR */
+EXPORT_SYMBOL(vio_cmo_entitlement_update);
+EXPORT_SYMBOL(vio_cmo_set_dev_desired);
+
+
+/*
+ * Platform Facilities Option (PFO) support
+ */
+
+/**
+ * vio_h_cop_sync - Perform a synchronous PFO co-processor operation
+ *
+ * @vdev - Pointer to a struct vio_dev for device
+ * @op - Pointer to a struct vio_pfo_op for the operation parameters
+ *
+ * Calls the hypervisor to synchronously perform the PFO operation
+ * described in @op. In the case of a busy response from the hypervisor,
+ * the operation will be re-submitted indefinitely unless a non-zero timeout
+ * is specified or an error occurs. The timeout places a limit on when to
+ * stop re-submitting a operation, the total time can be exceeded if an
+ * operation is in progress.
+ *
+ * If op->hcall_ret is not NULL, this will be set to the return from the
+ * last h_cop_op call or it will be 0 if an error not involving the h_call
+ * was encountered.
+ *
+ * Returns:
+ * 0 on success,
+ * -EINVAL if the h_call fails due to an invalid parameter,
+ * -E2BIG if the h_call can not be performed synchronously,
+ * -EBUSY if a timeout is specified and has elapsed,
+ * -EACCES if the memory area for data/status has been rescinded, or
+ * -EPERM if a hardware fault has been indicated
+ */
+int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op)
+{
+ struct device *dev = &vdev->dev;
+ unsigned long deadline = 0;
+ long hret = 0;
+ int ret = 0;
+
+ if (op->timeout)
+ deadline = jiffies + msecs_to_jiffies(op->timeout);
+
+ while (true) {
+ hret = plpar_hcall_norets(H_COP, op->flags,
+ vdev->resource_id,
+ op->in, op->inlen, op->out,
+ op->outlen, op->csbcpb);
+
+ if (hret == H_SUCCESS ||
+ (hret != H_NOT_ENOUGH_RESOURCES &&
+ hret != H_BUSY && hret != H_RESOURCE) ||
+ (op->timeout && time_after(deadline, jiffies)))
+ break;
+
+ dev_dbg(dev, "%s: hcall ret(%ld), retrying.\n", __func__, hret);
+ }
+
+ switch (hret) {
+ case H_SUCCESS:
+ ret = 0;
+ break;
+ case H_OP_MODE:
+ case H_TOO_BIG:
+ ret = -E2BIG;
+ break;
+ case H_RESCINDED:
+ ret = -EACCES;
+ break;
+ case H_HARDWARE:
+ ret = -EPERM;
+ break;
+ case H_NOT_ENOUGH_RESOURCES:
+ case H_RESOURCE:
+ case H_BUSY:
+ ret = -EBUSY;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ dev_dbg(dev, "%s: Sync h_cop_op failure (ret:%d) (hret:%ld)\n",
+ __func__, ret, hret);
+
+ op->hcall_err = hret;
+ return ret;
+}
+EXPORT_SYMBOL(vio_h_cop_sync);
+
+static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
+{
+ const __be32 *dma_window;
+ struct iommu_table *tbl;
+ unsigned long offset, size;
+
+ dma_window = of_get_property(dev->dev.of_node,
+ "ibm,my-dma-window", NULL);
+ if (!dma_window)
+ return NULL;
+
+ tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
+ if (tbl == NULL)
+ return NULL;
+
+ kref_init(&tbl->it_kref);
+
+ of_parse_dma_window(dev->dev.of_node, dma_window,
+ &tbl->it_index, &offset, &size);
+
+ /* TCE table size - measured in tce entries */
+ tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
+ tbl->it_size = size >> tbl->it_page_shift;
+ /* offset for VIO should always be 0 */
+ tbl->it_offset = offset >> tbl->it_page_shift;
+ tbl->it_busno = 0;
+ tbl->it_type = TCE_VB;
+ tbl->it_blocksize = 16;
+
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ tbl->it_ops = &iommu_table_lpar_multi_ops;
+ else
+ tbl->it_ops = &iommu_table_pseries_ops;
+
+ return iommu_init_table(tbl, -1, 0, 0);
+}
+
+/**
+ * vio_match_device: - Tell if a VIO device has a matching
+ * VIO device id structure.
+ * @ids: array of VIO device id structures to search in
+ * @dev: the VIO device structure to match against
+ *
+ * Used by a driver to check whether a VIO device present in the
+ * system is in its list of supported devices. Returns the matching
+ * vio_device_id structure or NULL if there is no match.
+ */
+static const struct vio_device_id *vio_match_device(
+ const struct vio_device_id *ids, const struct vio_dev *dev)
+{
+ while (ids->type[0] != '\0') {
+ if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
+ of_device_is_compatible(dev->dev.of_node,
+ ids->compat))
+ return ids;
+ ids++;
+ }
+ return NULL;
+}
+
+/*
+ * Convert from struct device to struct vio_dev and pass to driver.
+ * dev->driver has already been set by generic code because vio_bus_match
+ * succeeded.
+ */
+static int vio_bus_probe(struct device *dev)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct vio_driver *viodrv = to_vio_driver(dev->driver);
+ const struct vio_device_id *id;
+ int error = -ENODEV;
+
+ if (!viodrv->probe)
+ return error;
+
+ id = vio_match_device(viodrv->id_table, viodev);
+ if (id) {
+ memset(&viodev->cmo, 0, sizeof(viodev->cmo));
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ error = vio_cmo_bus_probe(viodev);
+ if (error)
+ return error;
+ }
+ error = viodrv->probe(viodev, id);
+ if (error && firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_bus_remove(viodev);
+ }
+
+ return error;
+}
+
+/* convert from struct device to struct vio_dev and pass to driver. */
+static void vio_bus_remove(struct device *dev)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct vio_driver *viodrv = to_vio_driver(dev->driver);
+ struct device *devptr;
+
+ /*
+ * Hold a reference to the device after the remove function is called
+ * to allow for CMO accounting cleanup for the device.
+ */
+ devptr = get_device(dev);
+
+ if (viodrv->remove)
+ viodrv->remove(viodev);
+
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_bus_remove(viodev);
+
+ put_device(devptr);
+}
+
+static void vio_bus_shutdown(struct device *dev)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct vio_driver *viodrv;
+
+ if (dev->driver) {
+ viodrv = to_vio_driver(dev->driver);
+ if (viodrv->shutdown)
+ viodrv->shutdown(viodev);
+ else if (kexec_in_progress)
+ vio_bus_remove(dev);
+ }
+}
+
+/**
+ * vio_register_driver: - Register a new vio driver
+ * @viodrv: The vio_driver structure to be registered.
+ */
+int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
+ const char *mod_name)
+{
+ // vio_bus_type is only initialised for pseries
+ if (!machine_is(pseries))
+ return -ENODEV;
+
+ pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
+
+ /* fill in 'struct driver' fields */
+ viodrv->driver.name = viodrv->name;
+ viodrv->driver.pm = viodrv->pm;
+ viodrv->driver.bus = &vio_bus_type;
+ viodrv->driver.owner = owner;
+ viodrv->driver.mod_name = mod_name;
+
+ return driver_register(&viodrv->driver);
+}
+EXPORT_SYMBOL(__vio_register_driver);
+
+/**
+ * vio_unregister_driver - Remove registration of vio driver.
+ * @viodrv: The vio_driver struct to be removed form registration
+ */
+void vio_unregister_driver(struct vio_driver *viodrv)
+{
+ driver_unregister(&viodrv->driver);
+}
+EXPORT_SYMBOL(vio_unregister_driver);
+
+/* vio_dev refcount hit 0 */
+static void vio_dev_release(struct device *dev)
+{
+ struct iommu_table *tbl = get_iommu_table_base(dev);
+
+ if (tbl)
+ iommu_tce_table_put(tbl);
+ of_node_put(dev->of_node);
+ kfree(to_vio_dev(dev));
+}
+
+/**
+ * vio_register_device_node: - Register a new vio device.
+ * @of_node: The OF node for this device.
+ *
+ * Creates and initializes a vio_dev structure from the data in
+ * of_node and adds it to the list of virtual devices.
+ * Returns a pointer to the created vio_dev or NULL if node has
+ * NULL device_type or compatible fields.
+ */
+struct vio_dev *vio_register_device_node(struct device_node *of_node)
+{
+ struct vio_dev *viodev;
+ struct device_node *parent_node;
+ const __be32 *prop;
+ enum vio_dev_family family;
+
+ /*
+ * Determine if this node is a under the /vdevice node or under the
+ * /ibm,platform-facilities node. This decides the device's family.
+ */
+ parent_node = of_get_parent(of_node);
+ if (parent_node) {
+ if (of_node_is_type(parent_node, "ibm,platform-facilities"))
+ family = PFO;
+ else if (of_node_is_type(parent_node, "vdevice"))
+ family = VDEVICE;
+ else {
+ pr_warn("%s: parent(%pOF) of %pOFn not recognized.\n",
+ __func__,
+ parent_node,
+ of_node);
+ of_node_put(parent_node);
+ return NULL;
+ }
+ of_node_put(parent_node);
+ } else {
+ pr_warn("%s: could not determine the parent of node %pOFn.\n",
+ __func__, of_node);
+ return NULL;
+ }
+
+ if (family == PFO) {
+ if (of_property_read_bool(of_node, "interrupt-controller")) {
+ pr_debug("%s: Skipping the interrupt controller %pOFn.\n",
+ __func__, of_node);
+ return NULL;
+ }
+ }
+
+ /* allocate a vio_dev for this node */
+ viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL);
+ if (viodev == NULL) {
+ pr_warn("%s: allocation failure for VIO device.\n", __func__);
+ return NULL;
+ }
+
+ /* we need the 'device_type' property, in order to match with drivers */
+ viodev->family = family;
+ if (viodev->family == VDEVICE) {
+ unsigned int unit_address;
+
+ viodev->type = of_node_get_device_type(of_node);
+ if (!viodev->type) {
+ pr_warn("%s: node %pOFn is missing the 'device_type' "
+ "property.\n", __func__, of_node);
+ goto out;
+ }
+
+ prop = of_get_property(of_node, "reg", NULL);
+ if (prop == NULL) {
+ pr_warn("%s: node %pOFn missing 'reg'\n",
+ __func__, of_node);
+ goto out;
+ }
+ unit_address = of_read_number(prop, 1);
+ dev_set_name(&viodev->dev, "%x", unit_address);
+ viodev->irq = irq_of_parse_and_map(of_node, 0);
+ viodev->unit_address = unit_address;
+ } else {
+ /* PFO devices need their resource_id for submitting COP_OPs
+ * This is an optional field for devices, but is required when
+ * performing synchronous ops */
+ prop = of_get_property(of_node, "ibm,resource-id", NULL);
+ if (prop != NULL)
+ viodev->resource_id = of_read_number(prop, 1);
+
+ dev_set_name(&viodev->dev, "%pOFn", of_node);
+ viodev->type = dev_name(&viodev->dev);
+ viodev->irq = 0;
+ }
+
+ viodev->name = of_node->name;
+ viodev->dev.of_node = of_node_get(of_node);
+
+ set_dev_node(&viodev->dev, of_node_to_nid(of_node));
+
+ /* init generic 'struct device' fields: */
+ viodev->dev.parent = &vio_bus_device.dev;
+ viodev->dev.bus = &vio_bus_type;
+ viodev->dev.release = vio_dev_release;
+
+ if (of_property_present(viodev->dev.of_node, "ibm,my-dma-window")) {
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_set_dma_ops(viodev);
+ else
+ set_dma_ops(&viodev->dev, &dma_iommu_ops);
+
+ set_iommu_table_base(&viodev->dev,
+ vio_build_iommu_table(viodev));
+
+ /* needed to ensure proper operation of coherent allocations
+ * later, in case driver doesn't set it explicitly */
+ viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
+ viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask;
+ }
+
+ /* register with generic device framework */
+ if (device_register(&viodev->dev)) {
+ printk(KERN_ERR "%s: failed to register device %s\n",
+ __func__, dev_name(&viodev->dev));
+ put_device(&viodev->dev);
+ return NULL;
+ }
+
+ return viodev;
+
+out: /* Use this exit point for any return prior to device_register */
+ kfree(viodev);
+
+ return NULL;
+}
+EXPORT_SYMBOL(vio_register_device_node);
+
+/*
+ * vio_bus_scan_for_devices - Scan OF and register each child device
+ * @root_name - OF node name for the root of the subtree to search.
+ * This must be non-NULL
+ *
+ * Starting from the root node provide, register the device node for
+ * each child beneath the root.
+ */
+static void __init vio_bus_scan_register_devices(char *root_name)
+{
+ struct device_node *node_root, *node_child;
+
+ if (!root_name)
+ return;
+
+ node_root = of_find_node_by_name(NULL, root_name);
+ if (node_root) {
+
+ /*
+ * Create struct vio_devices for each virtual device in
+ * the device tree. Drivers will associate with them later.
+ */
+ node_child = of_get_next_child(node_root, NULL);
+ while (node_child) {
+ vio_register_device_node(node_child);
+ node_child = of_get_next_child(node_root, node_child);
+ }
+ of_node_put(node_root);
+ }
+}
+
+/**
+ * vio_bus_init: - Initialize the virtual IO bus
+ */
+static int __init vio_bus_init(void)
+{
+ int err;
+
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_sysfs_init();
+
+ err = bus_register(&vio_bus_type);
+ if (err) {
+ printk(KERN_ERR "failed to register VIO bus\n");
+ return err;
+ }
+
+ /*
+ * The fake parent of all vio devices, just to give us
+ * a nice directory
+ */
+ err = device_register(&vio_bus_device.dev);
+ if (err) {
+ printk(KERN_WARNING "%s: device_register returned %i\n",
+ __func__, err);
+ return err;
+ }
+
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_bus_init();
+
+ return 0;
+}
+machine_postcore_initcall(pseries, vio_bus_init);
+
+static int __init vio_device_init(void)
+{
+ vio_bus_scan_register_devices("vdevice");
+ vio_bus_scan_register_devices("ibm,platform-facilities");
+
+ return 0;
+}
+machine_device_initcall(pseries, vio_device_init);
+
+static ssize_t name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t devspec_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct device_node *of_node = dev->of_node;
+
+ return sprintf(buf, "%pOF\n", of_node);
+}
+static DEVICE_ATTR_RO(devspec);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ const struct vio_dev *vio_dev = to_vio_dev(dev);
+ struct device_node *dn;
+ const char *cp;
+
+ dn = dev->of_node;
+ if (!dn) {
+ strcpy(buf, "\n");
+ return strlen(buf);
+ }
+ cp = of_get_property(dn, "compatible", NULL);
+ if (!cp) {
+ strcpy(buf, "\n");
+ return strlen(buf);
+ }
+
+ return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *vio_dev_attrs[] = {
+ &dev_attr_name.attr,
+ &dev_attr_devspec.attr,
+ &dev_attr_modalias.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(vio_dev);
+
+void vio_unregister_device(struct vio_dev *viodev)
+{
+ device_unregister(&viodev->dev);
+ if (viodev->family == VDEVICE)
+ irq_dispose_mapping(viodev->irq);
+}
+EXPORT_SYMBOL(vio_unregister_device);
+
+static int vio_bus_match(struct device *dev, struct device_driver *drv)
+{
+ const struct vio_dev *vio_dev = to_vio_dev(dev);
+ struct vio_driver *vio_drv = to_vio_driver(drv);
+ const struct vio_device_id *ids = vio_drv->id_table;
+
+ return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
+}
+
+static int vio_hotplug(const struct device *dev, struct kobj_uevent_env *env)
+{
+ const struct vio_dev *vio_dev = to_vio_dev(dev);
+ const struct device_node *dn;
+ const char *cp;
+
+ dn = dev->of_node;
+ if (!dn)
+ return -ENODEV;
+ cp = of_get_property(dn, "compatible", NULL);
+ if (!cp)
+ return -ENODEV;
+
+ add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
+ return 0;
+}
+
+struct bus_type vio_bus_type = {
+ .name = "vio",
+ .dev_groups = vio_dev_groups,
+ .uevent = vio_hotplug,
+ .match = vio_bus_match,
+ .probe = vio_bus_probe,
+ .remove = vio_bus_remove,
+ .shutdown = vio_bus_shutdown,
+};
+
+/**
+ * vio_get_attribute: - get attribute for virtual device
+ * @vdev: The vio device to get property.
+ * @which: The property/attribute to be extracted.
+ * @length: Pointer to length of returned data size (unused if NULL).
+ *
+ * Calls prom.c's of_get_property() to return the value of the
+ * attribute specified by @which
+*/
+const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length)
+{
+ return of_get_property(vdev->dev.of_node, which, length);
+}
+EXPORT_SYMBOL(vio_get_attribute);
+
+/* vio_find_name() - internal because only vio.c knows how we formatted the
+ * kobject name
+ */
+static struct vio_dev *vio_find_name(const char *name)
+{
+ struct device *found;
+
+ found = bus_find_device_by_name(&vio_bus_type, NULL, name);
+ if (!found)
+ return NULL;
+
+ return to_vio_dev(found);
+}
+
+/**
+ * vio_find_node - find an already-registered vio_dev
+ * @vnode: device_node of the virtual device we're looking for
+ *
+ * Takes a reference to the embedded struct device which needs to be dropped
+ * after use.
+ */
+struct vio_dev *vio_find_node(struct device_node *vnode)
+{
+ char kobj_name[20];
+ struct device_node *vnode_parent;
+
+ vnode_parent = of_get_parent(vnode);
+ if (!vnode_parent)
+ return NULL;
+
+ /* construct the kobject name from the device node */
+ if (of_node_is_type(vnode_parent, "vdevice")) {
+ const __be32 *prop;
+
+ prop = of_get_property(vnode, "reg", NULL);
+ if (!prop)
+ goto out;
+ snprintf(kobj_name, sizeof(kobj_name), "%x",
+ (uint32_t)of_read_number(prop, 1));
+ } else if (of_node_is_type(vnode_parent, "ibm,platform-facilities"))
+ snprintf(kobj_name, sizeof(kobj_name), "%pOFn", vnode);
+ else
+ goto out;
+
+ of_node_put(vnode_parent);
+ return vio_find_name(kobj_name);
+out:
+ of_node_put(vnode_parent);
+ return NULL;
+}
+EXPORT_SYMBOL(vio_find_node);
+
+int vio_enable_interrupts(struct vio_dev *dev)
+{
+ int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
+ if (rc != H_SUCCESS)
+ printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
+ return rc;
+}
+EXPORT_SYMBOL(vio_enable_interrupts);
+
+int vio_disable_interrupts(struct vio_dev *dev)
+{
+ int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
+ if (rc != H_SUCCESS)
+ printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
+ return rc;
+}
+EXPORT_SYMBOL(vio_disable_interrupts);
+
+static int __init vio_init(void)
+{
+ dma_debug_add_bus(&vio_bus_type);
+ return 0;
+}
+machine_fs_initcall(pseries, vio_init);
diff --git a/arch/powerpc/platforms/pseries/vphn.c b/arch/powerpc/platforms/pseries/vphn.c
new file mode 100644
index 000000000..3f85ece3c
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/vphn.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/byteorder.h>
+#include <asm/vphn.h>
+
+/*
+ * The associativity domain numbers are returned from the hypervisor as a
+ * stream of mixed 16-bit and 32-bit fields. The stream is terminated by the
+ * special value of "all ones" (aka. 0xffff) and its size may not exceed 48
+ * bytes.
+ *
+ * --- 16-bit fields -->
+ * _________________________
+ * | 0 | 1 | 2 | 3 | be_packed[0]
+ * ------+-----+-----+------
+ * _________________________
+ * | 4 | 5 | 6 | 7 | be_packed[1]
+ * -------------------------
+ * ...
+ * _________________________
+ * | 20 | 21 | 22 | 23 | be_packed[5]
+ * -------------------------
+ *
+ * Convert to the sequence they would appear in the ibm,associativity property.
+ */
+static int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
+{
+ __be64 be_packed[VPHN_REGISTER_COUNT];
+ int i, nr_assoc_doms = 0;
+ const __be16 *field = (const __be16 *) be_packed;
+ u16 last = 0;
+ bool is_32bit = false;
+
+#define VPHN_FIELD_UNUSED (0xffff)
+#define VPHN_FIELD_MSB (0x8000)
+#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB)
+
+ /* Let's fix the values returned by plpar_hcall9() */
+ for (i = 0; i < VPHN_REGISTER_COUNT; i++)
+ be_packed[i] = cpu_to_be64(packed[i]);
+
+ for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
+ u16 new = be16_to_cpup(field++);
+
+ if (is_32bit) {
+ /*
+ * Let's concatenate the 16 bits of this field to the
+ * 15 lower bits of the previous field
+ */
+ unpacked[++nr_assoc_doms] =
+ cpu_to_be32(last << 16 | new);
+ is_32bit = false;
+ } else if (new == VPHN_FIELD_UNUSED)
+ /* This is the list terminator */
+ break;
+ else if (new & VPHN_FIELD_MSB) {
+ /* Data is in the lower 15 bits of this field */
+ unpacked[++nr_assoc_doms] =
+ cpu_to_be32(new & VPHN_FIELD_MASK);
+ } else {
+ /*
+ * Data is in the lower 15 bits of this field
+ * concatenated with the next 16 bit field
+ */
+ last = new;
+ is_32bit = true;
+ }
+ }
+
+ /* The first cell contains the length of the property */
+ unpacked[0] = cpu_to_be32(nr_assoc_doms);
+
+ return nr_assoc_doms;
+}
+
+/* NOTE: This file is included by a selftest and built in userspace. */
+#ifdef __KERNEL__
+#include <asm/hvcall.h>
+
+long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity)
+{
+ long rc;
+ long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
+
+ rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, cpu);
+ if (rc == H_SUCCESS)
+ vphn_unpack_associativity(retbuf, associativity);
+
+ return rc;
+}
+#endif